{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "5fbed3d5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "所有包导入完成！\n"
     ]
    }
   ],
   "source": [
    "# 基础工具包\n",
    "import time, math, os\n",
    "from tqdm import tqdm\n",
    "import gc\n",
    "import pickle\n",
    "import random\n",
    "from datetime import datetime\n",
    "from operator import itemgetter\n",
    "\n",
    "# 数据处理包\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt  # 添加matplotlib导入\n",
    "\n",
    "# 其他工具包\n",
    "import warnings\n",
    "from collections import defaultdict\n",
    "import collections\n",
    "\n",
    "# 忽略警告信息\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "# 设置数据路径\n",
    "data_path = './Data/'  # 本地路径\n",
    "save_path = './temp_results/'  # 结果保存路径\n",
    "\n",
    "# 创建保存结果的目录\n",
    "if not os.path.exists(save_path):\n",
    "    os.makedirs(save_path)\n",
    "    print(f'创建结果保存目录：{save_path}')\n",
    "\n",
    "print('所有包导入完成！')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c9f91743",
   "metadata": {},
   "source": [
    "# 内存优化函数\n",
    "\n",
    "该函数用于优化DataFrame的内存使用，通过以下方式实现：\n",
    "1. 分析每一列的数据类型和取值范围\n",
    "2. 对于整数类型，选择最小的可用整数类型(int8/16/32/64)\n",
    "3. 对于浮点数，选择最小的可用浮点类型(float16/32/64)\n",
    "4. 计算并显示内存减少比例和处理时间"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "c682718f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-- Mem. usage decreased to  0.00 Mb (74.2% reduction),time spend:0.00 min\n",
      "\n",
      "各列的数据类型:\n",
      " int_col         int8\n",
      "float_col    float16\n",
      "dtype: object\n"
     ]
    }
   ],
   "source": [
    "def reduce_mem(df):\n",
    "    \"\"\"优化DataFrame的内存使用\n",
    "    \n",
    "    Args:\n",
    "        df (pd.DataFrame): 输入的DataFrame\n",
    "        \n",
    "    Returns:\n",
    "        pd.DataFrame: 优化后的DataFrame\n",
    "    \"\"\"\n",
    "    starttime = time.time()\n",
    "    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']\n",
    "    start_mem = df.memory_usage().sum() / 1024**2\n",
    "    \n",
    "    for col in df.columns:\n",
    "        col_type = df[col].dtypes\n",
    "        if col_type in numerics:\n",
    "            c_min = df[col].min()\n",
    "            c_max = df[col].max()\n",
    "            if pd.isnull(c_min) or pd.isnull(c_max):\n",
    "                continue\n",
    "            \n",
    "            if str(col_type)[:3] == 'int':\n",
    "                # 整数类型优化\n",
    "                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:\n",
    "                    df[col] = df[col].astype(np.int8)\n",
    "                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:\n",
    "                    df[col] = df[col].astype(np.int16)\n",
    "                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:\n",
    "                    df[col] = df[col].astype(np.int32)\n",
    "                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:\n",
    "                    df[col] = df[col].astype(np.int64)\n",
    "            else:\n",
    "                # 浮点数类型优化\n",
    "                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:\n",
    "                    df[col] = df[col].astype(np.float16)\n",
    "                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:\n",
    "                    df[col] = df[col].astype(np.float32)\n",
    "                else:\n",
    "                    df[col] = df[col].astype(np.float64)\n",
    "    \n",
    "    end_mem = df.memory_usage().sum() / 1024**2\n",
    "    print('-- Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction),time spend:{:2.2f} min'.format(\n",
    "        end_mem,\n",
    "        100 * (start_mem - end_mem) / start_mem,\n",
    "        (time.time() - starttime) / 60\n",
    "    ))\n",
    "    \n",
    "    return df\n",
    "\n",
    "# 测试函数\n",
    "if __name__ == \"__main__\":\n",
    "    # 创建一个测试DataFrame\n",
    "    test_df = pd.DataFrame({\n",
    "        'int_col': np.random.randint(-100, 100, size=1000),\n",
    "        'float_col': np.random.randn(1000)\n",
    "    })\n",
    "    \n",
    "    # 调用内存优化函数\n",
    "    test_df = reduce_mem(test_df)\n",
    "    print(\n",
    "        \"\\n各列的数据类型:\\n\",\n",
    "        test_df.dtypes\n",
    "    )"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e0cb30aa",
   "metadata": {},
   "source": [
    "# 数据读取与采样函数\n",
    "\n",
    "实现以下功能：\n",
    "1. `get_all_click_sample`: 从训练集中采样部分用户数据进行调试\n",
    "2. `get_all_click_df`: 读取全量数据，支持线上/线下模式"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "aee60b73",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_all_click_sample(data_path, sample_nums=10000):\n",
    "    \"\"\"从训练集中采样一部分数据调试\n",
    "    \n",
    "    Args:\n",
    "        data_path: 原数据的存储路径\n",
    "        sample_nums: 采样数目（这里由于机器的内存限制，可以采样用户做）\n",
    "    \"\"\"\n",
    "    # 读取训练集点击数据\n",
    "    all_click = pd.read_csv(data_path + 'train_click_log.csv')\n",
    "    \n",
    "    # 采样用户\n",
    "    all_user_ids = all_click.user_id.unique()\n",
    "    sample_user_ids = np.random.choice(all_user_ids, size=sample_nums, replace=False)\n",
    "    \n",
    "    # 筛选数据\n",
    "    all_click = all_click[all_click['user_id'].isin(sample_user_ids)]\n",
    "    all_click = all_click.drop_duplicates(['user_id', 'click_article_id', 'click_timestamp'])\n",
    "    \n",
    "    return all_click"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "59d9a4e7",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_all_click_df(data_path='./data_raw/', offline=True):\n",
    "    \"\"\"读取点击数据，这里分成线上和线下模式\n",
    "    \n",
    "    Args:\n",
    "        data_path: 数据路径\n",
    "        offline: 是否为线下模式\n",
    "            - True: 只使用训练集，用于线下验证\n",
    "            - False: 合并训练集和测试集，用于生成提交结果\n",
    "    \"\"\"\n",
    "    if offline:\n",
    "        # 线下模式，只读取训练集\n",
    "        all_click = pd.read_csv(data_path + 'train_click_log.csv')\n",
    "    else:\n",
    "        # 线上模式，合并训练集和测试集\n",
    "        trn_click = pd.read_csv(data_path + 'train_click_log.csv')\n",
    "        tst_click = pd.read_csv(data_path + 'testA_click_log.csv')\n",
    "        all_click = pd.concat([trn_click, tst_click])\n",
    "    \n",
    "    # 去除重复数据\n",
    "    all_click = all_click.drop_duplicates(['user_id', 'click_article_id', 'click_timestamp'])\n",
    "    return all_click"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "fe3024f9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "正在读取全量训练数据...\n",
      "正在进行内存优化...\n",
      "-- Mem. usage decreased to 46.65 Mb (62.5% reduction),time spend:0.00 min\n",
      "\n",
      "数据基本信息：\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Index: 1630633 entries, 0 to 518009\n",
      "Data columns (total 9 columns):\n",
      " #   Column               Non-Null Count    Dtype\n",
      "---  ------               --------------    -----\n",
      " 0   user_id              1630633 non-null  int32\n",
      " 1   click_article_id     1630633 non-null  int32\n",
      " 2   click_timestamp      1630633 non-null  int64\n",
      " 3   click_environment    1630633 non-null  int8 \n",
      " 4   click_deviceGroup    1630633 non-null  int8 \n",
      " 5   click_os             1630633 non-null  int8 \n",
      " 6   click_country        1630633 non-null  int8 \n",
      " 7   click_region         1630633 non-null  int8 \n",
      " 8   click_referrer_type  1630633 non-null  int8 \n",
      "dtypes: int32(2), int64(1), int8(6)\n",
      "memory usage: 46.7 MB\n",
      "None\n",
      "\n",
      "数据前5条记录：\n",
      "   user_id  click_article_id  click_timestamp  click_environment  \\\n",
      "0   199999            160417    1507029570190                  4   \n",
      "1   199999              5408    1507029571478                  4   \n",
      "2   199999             50823    1507029601478                  4   \n",
      "3   199998            157770    1507029532200                  4   \n",
      "4   199998             96613    1507029671831                  4   \n",
      "\n",
      "   click_deviceGroup  click_os  click_country  click_region  \\\n",
      "0                  1        17              1            13   \n",
      "1                  1        17              1            13   \n",
      "2                  1        17              1            13   \n",
      "3                  1        17              1            25   \n",
      "4                  1        17              1            25   \n",
      "\n",
      "   click_referrer_type  \n",
      "0                    1  \n",
      "1                    1  \n",
      "2                    1  \n",
      "3                    5  \n",
      "4                    5  \n",
      "\n",
      "基本统计信息：\n",
      "总记录数：1630633\n",
      "独立用户数：250000\n",
      "独立文章数：35380\n",
      "正在进行内存优化...\n",
      "-- Mem. usage decreased to 46.65 Mb (62.5% reduction),time spend:0.00 min\n",
      "\n",
      "数据基本信息：\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Index: 1630633 entries, 0 to 518009\n",
      "Data columns (total 9 columns):\n",
      " #   Column               Non-Null Count    Dtype\n",
      "---  ------               --------------    -----\n",
      " 0   user_id              1630633 non-null  int32\n",
      " 1   click_article_id     1630633 non-null  int32\n",
      " 2   click_timestamp      1630633 non-null  int64\n",
      " 3   click_environment    1630633 non-null  int8 \n",
      " 4   click_deviceGroup    1630633 non-null  int8 \n",
      " 5   click_os             1630633 non-null  int8 \n",
      " 6   click_country        1630633 non-null  int8 \n",
      " 7   click_region         1630633 non-null  int8 \n",
      " 8   click_referrer_type  1630633 non-null  int8 \n",
      "dtypes: int32(2), int64(1), int8(6)\n",
      "memory usage: 46.7 MB\n",
      "None\n",
      "\n",
      "数据前5条记录：\n",
      "   user_id  click_article_id  click_timestamp  click_environment  \\\n",
      "0   199999            160417    1507029570190                  4   \n",
      "1   199999              5408    1507029571478                  4   \n",
      "2   199999             50823    1507029601478                  4   \n",
      "3   199998            157770    1507029532200                  4   \n",
      "4   199998             96613    1507029671831                  4   \n",
      "\n",
      "   click_deviceGroup  click_os  click_country  click_region  \\\n",
      "0                  1        17              1            13   \n",
      "1                  1        17              1            13   \n",
      "2                  1        17              1            13   \n",
      "3                  1        17              1            25   \n",
      "4                  1        17              1            25   \n",
      "\n",
      "   click_referrer_type  \n",
      "0                    1  \n",
      "1                    1  \n",
      "2                    1  \n",
      "3                    5  \n",
      "4                    5  \n",
      "\n",
      "基本统计信息：\n",
      "总记录数：1630633\n",
      "独立用户数：250000\n",
      "独立文章数：35380\n"
     ]
    }
   ],
   "source": [
    "# 读取全量训练数据（包含测试集）\n",
    "print('正在读取全量训练数据...')\n",
    "all_click_df = get_all_click_df(data_path, offline=False)\n",
    "\n",
    "# 对数据进行内存优化\n",
    "print('正在进行内存优化...')\n",
    "all_click_df = reduce_mem(all_click_df)\n",
    "\n",
    "# 查看数据基本信息\n",
    "print('\\n数据基本信息：')\n",
    "print(all_click_df.info())\n",
    "\n",
    "# 查看数据示例\n",
    "print('\\n数据前5条记录：')\n",
    "print(all_click_df.head())\n",
    "\n",
    "# 统计信息\n",
    "print('\\n基本统计信息：')\n",
    "print(f'总记录数：{len(all_click_df)}')\n",
    "print(f'独立用户数：{all_click_df[\"user_id\"].nunique()}')\n",
    "print(f'独立文章数：{all_click_df[\"click_article_id\"].nunique()}')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "019f7193",
   "metadata": {},
   "source": [
    "# 点击序列处理\n",
    "\n",
    "实现以下功能：\n",
    "1. 将用户的点击数据转换为时序序列\n",
    "2. 输出格式：`{user1: [(item1, time1), (item2, time2)...]...}`\n",
    "3. 按时间戳排序，保证序列的时间顺序性"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "56b16f64",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "处理用户点击序列...\n",
      "\n",
      "第一个用户的点击序列示例：\n",
      "\n",
      "用户ID 0 的点击序列：\n",
      "\t文章ID: 30760, 时间戳: 1508211672520\n",
      "\t文章ID: 157507, 时间戳: 1508211702520\n",
      "\n",
      "基本统计信息：\n",
      "用户数量：250000\n",
      "平均序列长度：6.52\n",
      "最短序列长度：1\n",
      "最长序列长度：938\n",
      "\n",
      "第一个用户的点击序列示例：\n",
      "\n",
      "用户ID 0 的点击序列：\n",
      "\t文章ID: 30760, 时间戳: 1508211672520\n",
      "\t文章ID: 157507, 时间戳: 1508211702520\n",
      "\n",
      "基本统计信息：\n",
      "用户数量：250000\n",
      "平均序列长度：6.52\n",
      "最短序列长度：1\n",
      "最长序列长度：938\n"
     ]
    }
   ],
   "source": [
    "def get_user_item_time(click_df):\n",
    "    \"\"\"根据点击时间获取用户的点击文章序列\n",
    "    \n",
    "    Args:\n",
    "        click_df (pd.DataFrame): 包含用户点击数据的DataFrame\n",
    "            必须包含列：'user_id', 'click_article_id', 'click_timestamp'\n",
    "    \n",
    "    Returns:\n",
    "        dict: 用户点击序列字典\n",
    "            {\n",
    "                user_id1: [(item_id1, timestamp1), (item_id2, timestamp2), ...],\n",
    "                user_id2: [(item_id1, timestamp1), ...]\n",
    "            }\n",
    "    \"\"\"\n",
    "    # 1. 按时间戳排序\n",
    "    click_df = click_df.sort_values('click_timestamp')\n",
    "    \n",
    "    # 2. 定义将DataFrame转换为列表的函数\n",
    "    def make_item_time_pair(df):\n",
    "        return list(zip(df['click_article_id'], df['click_timestamp']))\n",
    "    \n",
    "    # 3. 对每个用户的数据进行处理\n",
    "    user_item_time_df = click_df.groupby('user_id')[['click_article_id', 'click_timestamp']].apply(\n",
    "        lambda x: make_item_time_pair(x)\n",
    "    ).reset_index().rename(columns={0: 'item_time_list'})\n",
    "    \n",
    "    # 4. 转换为字典格式\n",
    "    user_item_time_dict = dict(zip(user_item_time_df['user_id'], user_item_time_df['item_time_list']))\n",
    "    \n",
    "    return user_item_time_dict\n",
    "\n",
    "# 测试函数\n",
    "print('处理用户点击序列...')\n",
    "user_item_time_dict = get_user_item_time(all_click_df)\n",
    "\n",
    "# 显示示例\n",
    "print('\\n第一个用户的点击序列示例：')\n",
    "first_user_id = list(user_item_time_dict.keys())[0]\n",
    "print(f'\\n用户ID {first_user_id} 的点击序列：')\n",
    "for item_id, timestamp in user_item_time_dict[first_user_id][:5]:  # 只显示前5条记录\n",
    "    print(f'\\t文章ID: {item_id}, 时间戳: {timestamp}')\n",
    "\n",
    "# 统计信息\n",
    "print('\\n基本统计信息：')\n",
    "print(f'用户数量：{len(user_item_time_dict)}')\n",
    "seq_lens = [len(item_list) for item_list in user_item_time_dict.values()]\n",
    "print(f'平均序列长度：{np.mean(seq_lens):.2f}')\n",
    "print(f'最短序列长度：{min(seq_lens)}')\n",
    "print(f'最长序列长度：{max(seq_lens)}')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c755be38",
   "metadata": {},
   "source": [
    "# 热门文章分析\n",
    "\n",
    "实现以下功能：\n",
    "1. 获取点击量最高的TopK文章\n",
    "2. 统计文章点击分布\n",
    "3. 可视化点击量分布"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "b902a583",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "分析热门文章...\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAABKUAAAJOCAYAAABm7rQwAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABZ7UlEQVR4nO3de5hVdb0/8PdwR4JRIMBJvJRkKuQFFQFTvIEmkvkrLWqK4niJlFDQ4yULTcDwfqCLqamJSp1TWmmSWKkRXlFUvGAXUAwQExgEkev+/dFhH0dQQWENMK/X88yj+7s+a+/PYs3as+c937VWRalUKgUAAAAACtSgrhsAAAAAoP4RSgEAAABQOKEUAAAAAIUTSgEAAABQOKEUAAAAAIUTSgEAAABQOKEUAAAAAIUTSgEAAABQOKEUAAAAAIVrVNcNAACsj+eeey7du3d/15o///nP+dSnPvWuNY8++mhWrlz5ns9VH+s6depUfnzRRRdl/PjxmTZtWr7+9a/npptuetf1k+SrX/1qbrzxxvesWx+LFi3KmDFjMnHixDz//PNZvHhxdtlll3z5y1/Ot771rTRr1qxW/YoVKzJy5MjccMMNmTNnTnbZZZd885vfzOmnn16rrrq6Oq+//nruuOOOjdInAPD+CaUAgC3CqlWr0rlz50yaNGmdyw866KBUVFS8Z82qVavW67nqY90as2fPzujRo3PjjTemQYMGueCCC3LqqaeWlz/++OP55je/mZEjR+bQQw8tj3/4wx9e5/O/Hy+99FKuuuqqVFdX58wzz8yHPvSh/PnPf87w4cMzceLETJw4MRUVFeX6QYMG5eabb873vve97L///vn973+fb33rW3n99ddz3nnnleuGDx+eT3ziE/njH/+Yww47bKP1CwBsOKEUAAC1XH311dl2221z/PHHJ0k+9rGP5WMf+1h5+Ztvvpkk6dSpUw488MBN0sMuu+ySmTNnpkWLFuWxww47LC1atMhZZ52Vv/zlLznooIOSJM8880yuv/76jBgxImeddVaSpFevXnnttddy8cUX59RTT03r1q3L23LUUUflkksuEUoBQB1zTSkAAMqWL1+e66+/Pv3790+DBhv2UfGnP/1p9tprrzRr1iytW7fOZz/72Tz33HO1agYMGJAPfehDeeaZZ3L44YenRYsW+fCHP5zTTjstb7zxRrmuRYsWtQKpNQ444IAkyaxZs8pjd9xxR0qlUr72ta/Vqv3a176WpUuXZsKECbXGq6urc++99+bvf//7Bm0fALBxCaUAACh7+OGH89prr9U6LW99jBo1KgMHDsyee+6ZX/3qV7n66qvz1FNPpXv37vnrX/9aq3bFihX59Kc/ncMPPzx33HFHTjvttFxzzTU58cQT3/N1/vjHPyZJ9txzz/LYtGnT8uEPfzgdOnSoVfvJT36yvPytevXqlVKplN/97ncbtI0AwMbl9D0AAMoefPDBJMm+++673ussXLgw3/ve9/LpT386t956a3m8V69e6dSpU4YPH55bbrmlPL58+fIMHTo0gwcPTpIceeSRady4cc4///z85S9/Sc+ePdf5Ok899VRGjx6dz372s+XAKUlee+218ul5b9WiRYs0adIkr732Wq3xdu3a5SMf+Uj+8pe/rHUhdACgOGZKAQBQNnv27FRUVKRt27brvc6DDz6YpUuXZsCAAbXGO3bsmMMOOyx/+MMf1lrnS1/6Uq3H/fv3T5L86U9/WudrzJw5M3379k3Hjh1z3XXXrbX8rRc9X59l7dq1yz//+c93XAcA2PSEUgAAlC1dujSNGzdOw4YN13udNTORtt9++7WWVVVVrTVTqVGjRmnTpk2tsTWn3r29NklefPHFHHrooWnUqFH+8Ic/rDUrqk2bNutcb8mSJVm+fPk6Z1E1a9YsS5cufY8tAwA2JaEUAABlbdu2zfLly7NkyZL1XmdNwDRnzpy1ls2ePXutWVcrV65cK0SaO3dureda48UXXyxfA+pPf/pTdthhh7Veo0uXLnn11VfLz7HG008/nSTp3LnzWuvMnz9/g2aDAQAbn1AKAICyT3ziE0myQXem6969e5o3b55x48bVGn/55Zfzxz/+MYcffvha67z1GlNJytei6tWrV3nspZdeSq9evbJq1ar88Y9/zE477bTO1//MZz6TioqK3HTTTbXGb7zxxjRv3jxHHXVUrfGVK1dm1qxZ2WOPPdZ7GwGAjc+FzgEAKFsTCj300EO1Lib+brbddttccMEFOe+88/KVr3wlX/ziF/Paa6/lwgsvTLNmzfLd7363Vn2TJk1y+eWXZ/Hixdl///0zefLkXHzxxTn66KNz0EEHJUnmzZuXQw89NHPmzMn111+fefPmZd68eeXn2GGHHcqzpvbcc88MHDgw3/3ud9OwYcPsv//+ueeee/KTn/wkF1988Vqn7z311FN54403NvgOgwDAxiWUAgCgrGPHjvnUpz6VX//61zn55JPXe71zzz037dq1y3/913/l5z//eZo3b55evXpl5MiR6dSpU63axo0b584778zgwYNz8cUXp3nz5jnppJNy6aWXlmueffbZ/OMf/0iSfPnLX17r9b773e9m+PDh5cc//OEP85GPfCRjxozJ3Llzs/POO+fqq69e59317rjjjrRt2za9e/de7+0DADY+oRQAALV861vfyoknnph//vOf+chHPrLW8jXXeHq7gQMHZuDAgev1Gl26dHnHO+2922u8k8aNG2f48OG1gqp1WbVqVW6++eacfPLJadq06Xo/PwCw8bmmFAAAtRx//PHZf//9M2rUqLpuZaMbN25cFi9enLPOOquuWwGAes9MKQBgi9CwYcM8+eST2Xbbbde5fNWqVVm1atV71jRo0GC9nqs+1q1RUVGRa6+9Nr/5zW+yevXqWsu2dKtXr84tt9zyjv8WAEBxKkobMi8aAAAAADaCrefPXgAAAABsMYRSAAAAABROKAUAAABA4VzofCNavXp1Zs+enZYtW6aioqKu2wEAAAAoXKlUyuuvv56qqqp3vWGKUGojmj17djp27FjXbQAAAADUuVmzZmWHHXZ4x+VCqY2oZcuWSf79j96qVas67gYAAACgeIsWLUrHjh3LOck7EUptRGtO2WvVqpVQCgAAAKjX3uvSRi50DgAAAEDhhFIAAAAAFE4oBQAAAEDhhFIAAAAAFE4oBQAAAEDhhFIAAAAAFE4oBQAAAEDhhFIAAAAAFE4oBQAAAEDhhFIAAAAAFE4oBQAAAEDhhFIAAAAAFE4oBQAAAEDhhFIAAAAAFK5OQ6kHHnggxx57bKqqqlJRUZE77rhjrZrnnnsu/fr1S2VlZVq2bJkDDzwwL730Unn5smXLcvrpp6dt27Zp0aJF+vXrl5dffrnWcyxYsCDV1dWprKxMZWVlqqurs3Dhwlo1L730Uo499ti0aNEibdu2zeDBg7N8+fJNsdkAAAAA9V6dhlJLlizJXnvtlbFjx65z+d///vccdNBB+cQnPpH77rsvTz75ZC644II0a9asXDNkyJDcfvvtGT9+fCZNmpTFixenb9++WbVqVbmmf//+mTp1aiZMmJAJEyZk6tSpqa6uLi9ftWpVjjnmmCxZsiSTJk3K+PHj88tf/jJDhw7ddBsPAAAAUI9VlEqlUl03kSQVFRW5/fbbc9xxx5XHvvCFL6Rx48a5+eab17lOTU1NPvzhD+fmm2/OiSeemCSZPXt2OnbsmN/97nfp06dPnnvuueyxxx556KGH0q1btyTJQw89lO7du+f555/Pbrvtlrvvvjt9+/bNrFmzUlVVlSQZP358BgwYkHnz5qVVq1brtQ2LFi1KZWVlampq1nsdAAAAgK3J+uYjm+01pVavXp277rorH//4x9OnT5+0a9cu3bp1q3WK35QpU7JixYr07t27PFZVVZXOnTtn8uTJSZIHH3wwlZWV5UAqSQ488MBUVlbWquncuXM5kEqSPn36ZNmyZZkyZcom3lIAAACA+mezDaXmzZuXxYsX55JLLslRRx2Ve+65J5/97Gdz/PHH5/7770+SzJ07N02aNMl2221Xa9327dtn7ty55Zp27dqt9fzt2rWrVdO+fftay7fbbrs0adKkXLMuy5Yty6JFi2p9AQAAAPDeGtV1A+9k9erVSZLPfOYzOeOMM5Ike++9dyZPnpwf//jHOeSQQ95x3VKplIqKivLjt/7/B6l5u1GjRuXCCy98740BAAAAoJbNdqZU27Zt06hRo+yxxx61xnfffffy3fc6dOiQ5cuXZ8GCBbVq5s2bV5751KFDh7zyyitrPf+rr75aq+btM6IWLFiQFStWrDWD6q3OPffc1NTUlL9mzZq14RsKAAAAUA9ttjOlmjRpkv333z/Tp0+vNf7CCy9kp512SpJ07do1jRs3zsSJE3PCCSckSebMmZNp06Zl9OjRSZLu3bunpqYmjzzySA444IAkycMPP5yampr06NGjXDNixIjMmTMn22+/fZLknnvuSdOmTdO1a9d37LFp06Zp2rTpxt3wzcDO59xV1y28p5mXHFPXLQAAAAAfQJ2GUosXL87f/va38uMZM2Zk6tSpad26dXbcccecddZZOfHEE3PwwQfn0EMPzYQJE/Lb3/429913X5KksrIyAwcOzNChQ9OmTZu0bt06w4YNS5cuXXLEEUck+ffMqqOOOionnXRSrrnmmiTJySefnL59+2a33XZLkvTu3Tt77LFHqqurc+mll2b+/PkZNmxYTjrpJHfRAwAAANgE6vT0vcceeyz77LNP9tlnnyTJmWeemX322Sff+c53kiSf/exn8+Mf/zijR49Oly5dct111+WXv/xlDjrooPJzXHnllTnuuONywgknpGfPntlmm23y29/+Ng0bNizX3HLLLenSpUt69+6d3r1755Of/GRuvvnm8vKGDRvmrrvuSrNmzdKzZ8+ccMIJOe6443LZZZcV9C8BAAAAUL9UlEqlUl03sbVYtGhRKisrU1NTs0XPsHL6HgAAAPB+rW8+stle6BwAAACArZdQCgAAAIDCCaUAAAAAKFyd3n0PNrXN/fpYro0FAABAfWWmFAAAAACFE0oBAAAAUDihFAAAAACFE0oBAAAAUDihFAAAAACFE0oBAAAAUDihFAAAAACFE0oBAAAAULhGdd0A8N52Pueuum7hXc285Ji6bgEAAIAtjJlSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABTO3feAwmzudxFM3EkQAACgKGZKAQAAAFA4oRQAAAAAhRNKAQAAAFA4oRQAAAAAhRNKAQAAAFA4oRQAAAAAhRNKAQAAAFA4oRQAAAAAhRNKAQAAAFA4oRQAAAAAhRNKAQAAAFA4oRQAAAAAhRNKAQAAAFA4oRQAAAAAhRNKAQAAAFA4oRQAAAAAhRNKAQAAAFA4oRQAAAAAhRNKAQAAAFA4oRQAAAAAhRNKAQAAAFA4oRQAAAAAhRNKAQAAAFA4oRQAAAAAhRNKAQAAAFA4oRQAAAAAhRNKAQAAAFA4oRQAAAAAhRNKAQAAAFC4Og2lHnjggRx77LGpqqpKRUVF7rjjjnesPeWUU1JRUZGrrrqq1viyZcty+umnp23btmnRokX69euXl19+uVbNggULUl1dncrKylRWVqa6ujoLFy6sVfPSSy/l2GOPTYsWLdK2bdsMHjw4y5cv30hbCgAAAMBb1WkotWTJkuy1114ZO3bsu9bdcccdefjhh1NVVbXWsiFDhuT222/P+PHjM2nSpCxevDh9+/bNqlWryjX9+/fP1KlTM2HChEyYMCFTp05NdXV1efmqVatyzDHHZMmSJZk0aVLGjx+fX/7ylxk6dOjG21gAAAAAyhrV5YsfffTROfroo9+15p///GdOO+20/P73v88xxxxTa1lNTU2uv/763HzzzTniiCOSJOPGjUvHjh1z7733pk+fPnnuuecyYcKEPPTQQ+nWrVuS5Nprr0337t0zffr07Lbbbrnnnnvy7LPPZtasWeXg6/LLL8+AAQMyYsSItGrVahNsPQAAAED9tVlfU2r16tWprq7OWWedlT333HOt5VOmTMmKFSvSu3fv8lhVVVU6d+6cyZMnJ0kefPDBVFZWlgOpJDnwwANTWVlZq6Zz5861ZmL16dMny5Yty5QpU96xv2XLlmXRokW1vgAAAAB4b3U6U+q9fP/730+jRo0yePDgdS6fO3dumjRpku22267WePv27TN37txyTbt27dZat127drVq2rdvX2v5dtttlyZNmpRr1mXUqFG58MILN2ibgK3DzufcVdctvKuZlxzz3kXZerYDAADY8my2M6WmTJmSq6++OjfeeGMqKio2aN1SqVRrnXWt/35q3u7cc89NTU1N+WvWrFkb1CcAAABAfbXZhlJ//vOfM2/evOy4445p1KhRGjVqlBdffDFDhw7NzjvvnCTp0KFDli9fngULFtRad968eeWZTx06dMgrr7yy1vO/+uqrtWrePiNqwYIFWbFixVozqN6qadOmadWqVa0vAAAAAN7bZhtKVVdX56mnnsrUqVPLX1VVVTnrrLPy+9//PknStWvXNG7cOBMnTiyvN2fOnEybNi09evRIknTv3j01NTV55JFHyjUPP/xwampqatVMmzYtc+bMKdfcc889adq0abp27VrE5gIAAADUK3V6TanFixfnb3/7W/nxjBkzMnXq1LRu3To77rhj2rRpU6u+cePG6dChQ3bbbbckSWVlZQYOHJihQ4emTZs2ad26dYYNG5YuXbqU78a3++6756ijjspJJ52Ua665Jkly8sknp2/fvuXn6d27d/bYY49UV1fn0ksvzfz58zNs2LCcdNJJZj8BAAAAbAJ1OlPqscceyz777JN99tknSXLmmWdmn332yXe+8531fo4rr7wyxx13XE444YT07Nkz22yzTX7729+mYcOG5ZpbbrklXbp0Se/evdO7d+988pOfzM0331xe3rBhw9x1111p1qxZevbsmRNOOCHHHXdcLrvsso23sQAAAACU1elMqV69eqVUKq13/cyZM9caa9asWcaMGZMxY8a843qtW7fOuHHj3vW5d9xxx9x5553r3QsAAAAA799me00pAAAAALZeQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACteorhsAgI1h53PuqusW3tXMS46p6xYAAGCzYqYUAAAAAIUTSgEAAABQOKEUAAAAAIUTSgEAAABQOKEUAAAAAIUTSgEAAABQuEZ13QAA8H92Pueuum7hXc285Ji6bgEAgK2EmVIAAAAAFE4oBQAAAEDhhFIAAAAAFE4oBQAAAEDhhFIAAAAAFE4oBQAAAEDhhFIAAAAAFE4oBQAAAEDhhFIAAAAAFE4oBQAAAEDhhFIAAAAAFE4oBQAAAEDhhFIAAAAAFE4oBQAAAEDhhFIAAAAAFE4oBQAAAEDhhFIAAAAAFE4oBQAAAEDhhFIAAAAAFE4oBQAAAEDhhFIAAAAAFE4oBQAAAEDhhFIAAAAAFE4oBQAAAEDhhFIAAAAAFK5OQ6kHHnggxx57bKqqqlJRUZE77rijvGzFihX5z//8z3Tp0iUtWrRIVVVVvvKVr2T27Nm1nmPZsmU5/fTT07Zt27Ro0SL9+vXLyy+/XKtmwYIFqa6uTmVlZSorK1NdXZ2FCxfWqnnppZdy7LHHpkWLFmnbtm0GDx6c5cuXb6pNBwAAAKjX6jSUWrJkSfbaa6+MHTt2rWVvvPFGHn/88VxwwQV5/PHH86tf/SovvPBC+vXrV6tuyJAhuf322zN+/PhMmjQpixcvTt++fbNq1apyTf/+/TN16tRMmDAhEyZMyNSpU1NdXV1evmrVqhxzzDFZsmRJJk2alPHjx+eXv/xlhg4duuk2HgAAAKAea1SXL3700Ufn6KOPXueyysrKTJw4sdbYmDFjcsABB+Sll17KjjvumJqamlx//fW5+eabc8QRRyRJxo0bl44dO+bee+9Nnz598txzz2XChAl56KGH0q1btyTJtddem+7du2f69OnZbbfdcs899+TZZ5/NrFmzUlVVlSS5/PLLM2DAgIwYMSKtWrXahP8KAAAAAPXPFnVNqZqamlRUVGTbbbdNkkyZMiUrVqxI7969yzVVVVXp3LlzJk+enCR58MEHU1lZWQ6kkuTAAw9MZWVlrZrOnTuXA6kk6dOnT5YtW5YpU6YUsGUAAAAA9UudzpTaEG+++WbOOeec9O/fvzxzae7cuWnSpEm22267WrXt27fP3LlzyzXt2rVb6/natWtXq6Z9+/a1lm+33XZp0qRJuWZdli1blmXLlpUfL1q06P1tHAAAAEA9s0WEUitWrMgXvvCFrF69Oj/84Q/fs75UKqWioqL8+K3//0Fq3m7UqFG58MIL37MfAKhvdj7nrrpu4V3NvOSYum4BAKDe2+xP31uxYkVOOOGEzJgxIxMnTqx1facOHTpk+fLlWbBgQa115s2bV5751KFDh7zyyitrPe+rr75aq+btM6IWLFiQFStWrDWD6q3OPffc1NTUlL9mzZr1vrcTAAAAoD7ZrEOpNYHUX//619x7771p06ZNreVdu3ZN48aNa10Qfc6cOZk2bVp69OiRJOnevXtqamryyCOPlGsefvjh1NTU1KqZNm1a5syZU66555570rRp03Tt2vUd+2vatGlatWpV6wsAAACA91anp+8tXrw4f/vb38qPZ8yYkalTp6Z169apqqrK5z73uTz++OO58847s2rVqvJsptatW6dJkyaprKzMwIEDM3To0LRp0yatW7fOsGHD0qVLl/Ld+HbfffccddRROemkk3LNNdckSU4++eT07ds3u+22W5Kkd+/e2WOPPVJdXZ1LL7008+fPz7Bhw3LSSScJmgAAAAA2gToNpR577LEceuih5cdnnnlmkuSrX/1qhg8fnt/85jdJkr333rvWen/605/Sq1evJMmVV16ZRo0a5YQTTsjSpUtz+OGH58Ybb0zDhg3L9bfccksGDx5cvktfv379Mnbs2PLyhg0b5q677sqgQYPSs2fPNG/ePP37989ll122KTYbAAAAoN6r01CqV69eKZVK77j83Zat0axZs4wZMyZjxox5x5rWrVtn3Lhx7/o8O+64Y+688873fD0AAAAAPrjN+ppSAAAAAGydhFIAAAAAFE4oBQAAAEDhhFIAAAAAFE4oBQAAAEDhhFIAAAAAFE4oBQAAAEDhhFIAAAAAFK5RXTcAALA52/mcu+q6hXc185Jj6roFAID3xUwpAAAAAAonlAIAAACgcEIpAAAAAAonlAIAAACgcEIpAAAAAAonlAIAAACgcEIpAAAAAAonlAIAAACgcEIpAAAAAAonlAIAAACgcEIpAAAAAAonlAIAAACgcEIpAAAAAAonlAIAAACgcEIpAAAAAAonlAIAAACgcEIpAAAAAAonlAIAAACgcEIpAAAAAAonlAIAAACgcEIpAAAAAAonlAIAAACgcEIpAAAAAAonlAIAAACgcI3qugEAADa9nc+5q65beFczLzmmrlsAAApmphQAAAAAhRNKAQAAAFA4oRQAAAAAhRNKAQAAAFA4oRQAAAAAhRNKAQAAAFA4oRQAAAAAhRNKAQAAAFA4oRQAAAAAhRNKAQAAAFA4oRQAAAAAhRNKAQAAAFA4oRQAAAAAhavTUOqBBx7Isccem6qqqlRUVOSOO+6otbxUKmX48OGpqqpK8+bN06tXrzzzzDO1apYtW5bTTz89bdu2TYsWLdKvX7+8/PLLtWoWLFiQ6urqVFZWprKyMtXV1Vm4cGGtmpdeeinHHntsWrRokbZt22bw4MFZvnz5pthsAAAAgHqvTkOpJUuWZK+99srYsWPXuXz06NG54oorMnbs2Dz66KPp0KFDjjzyyLz++uvlmiFDhuT222/P+PHjM2nSpCxevDh9+/bNqlWryjX9+/fP1KlTM2HChEyYMCFTp05NdXV1efmqVatyzDHHZMmSJZk0aVLGjx+fX/7ylxk6dOim23gAAACAeqxRXb740UcfnaOPPnqdy0qlUq666qqcf/75Of7445MkN910U9q3b59bb701p5xySmpqanL99dfn5ptvzhFHHJEkGTduXDp27Jh77703ffr0yXPPPZcJEybkoYceSrdu3ZIk1157bbp3757p06dnt912yz333JNnn302s2bNSlVVVZLk8ssvz4ABAzJixIi0atWqgH8NAAAAgPpjs72m1IwZMzJ37tz07t27PNa0adMccsghmTx5cpJkypQpWbFiRa2aqqqqdO7cuVzz4IMPprKyshxIJcmBBx6YysrKWjWdO3cuB1JJ0qdPnyxbtixTpkzZpNsJAAAAUB/V6UypdzN37twkSfv27WuNt2/fPi+++GK5pkmTJtluu+3Wqlmz/ty5c9OuXbu1nr9du3a1at7+Otttt12aNGlSrlmXZcuWZdmyZeXHixYtWt/NAwAAAKjXNtuZUmtUVFTUelwqldYae7u316yr/v3UvN2oUaPKF0+vrKxMx44d37UvAAAAAP5tsw2lOnTokCRrzVSaN29eeVZThw4dsnz58ixYsOBda1555ZW1nv/VV1+tVfP211mwYEFWrFix1gyqtzr33HNTU1NT/po1a9YGbiUAAABA/bTZnr63yy67pEOHDpk4cWL22WefJMny5ctz//335/vf/36SpGvXrmncuHEmTpyYE044IUkyZ86cTJs2LaNHj06SdO/ePTU1NXnkkUdywAEHJEkefvjh1NTUpEePHuWaESNGZM6cOdl+++2TJPfcc0+aNm2arl27vmOPTZs2TdOmTTfNPwAAAGvZ+Zy76rqFdzXzkmPqugUA2GLUaSi1ePHi/O1vfys/njFjRqZOnZrWrVtnxx13zJAhQzJy5Mh06tQpnTp1ysiRI7PNNtukf//+SZLKysoMHDgwQ4cOTZs2bdK6desMGzYsXbp0Kd+Nb/fdd89RRx2Vk046Kddcc02S5OSTT07fvn2z2267JUl69+6dPfbYI9XV1bn00kszf/78DBs2LCeddJI77wEAAABsAnUaSj322GM59NBDy4/PPPPMJMlXv/rV3HjjjTn77LOzdOnSDBo0KAsWLEi3bt1yzz33pGXLluV1rrzyyjRq1CgnnHBCli5dmsMPPzw33nhjGjZsWK655ZZbMnjw4PJd+vr165exY8eWlzds2DB33XVXBg0alJ49e6Z58+bp379/Lrvssk39TwAAAABQL9VpKNWrV6+USqV3XF5RUZHhw4dn+PDh71jTrFmzjBkzJmPGjHnHmtatW2fcuHHv2suOO+6YO++88z17BgAAAOCD22wvdA4AAADA1ksoBQAAAEDhhFIAAAAAFK5OrykFAAD11c7n3FXXLbyrmZccU9ctALCVE0oBAADvm3ANgPdLKAUAANR7wjWA4rmmFAAAAACFE0oBAAAAUDin7wEAAGxFnIoIbCnMlAIAAACgcEIpAAAAAAonlAIAAACgcEIpAAAAAArnQucAAABsdlywHbZ+ZkoBAAAAUDihFAAAAACFE0oBAAAAUDihFAAAAACFc6FzAAAA2ERcsB3emZlSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4RrVdQMAAADA5m/nc+6q6xbe1cxLjqnrFthAZkoBAAAAULgNmik1YMCAvPDCC+tdv8cee+S6667b4KYAAAAA2LptUCj11FNP5fHHH1/v+gMOOGCDGwIAAABg6+f0PQAAAAAKJ5QCAAAAoHBCKQAAAAAKJ5QCAAAAoHAbdKHzUqmUr3/96+tdWyqV3ldTAAAAAGzdNiiUuuOOO/Lmm2+ud33z5s03uCEAAAAAtn4bFEpNmTIl//rXv9a7vl27dtlxxx03uCkAAAAAtm4bdE2piy++OM2aNUvTpk3X62vkyJGbqm8AAAAAtmAbfE2pr3zlK+tdP3bs2A1uCAAAAICt3wbNlKqoqNigJ9/QegAAAADqhw2aKQUAAACwJdv5nLvquoV3NfOSY+q6hcJs0EwpAAAAANgYNviaUg888MB615ZKpffVFAAAAABbtw0Kpb7+9a/n7rvvXu/6AQMGbGg/AAAAANQDGxRKfeMb38jq1avXu75BA2cHAgAAALC2DQqlDjjggGy77bbrVVsqlfLGG2/k4Ycffj99AQAAALAV2+BrSv3xj39c7/r9999/gxsCAAAAYOu3QefXVVRUbNCTb2g9AAAAAPWDiz4BAAAAULjNOpRauXJlvv3tb2eXXXZJ8+bN89GPfjQXXXRRrYutl0qlDB8+PFVVVWnevHl69eqVZ555ptbzLFu2LKeffnratm2bFi1apF+/fnn55Zdr1SxYsCDV1dWprKxMZWVlqqurs3DhwiI2EwAAAKDe2axDqe9///v58Y9/nLFjx+a5557L6NGjc+mll2bMmDHlmtGjR+eKK67I2LFj8+ijj6ZDhw458sgj8/rrr5drhgwZkttvvz3jx4/PpEmTsnjx4vTt2zerVq0q1/Tv3z9Tp07NhAkTMmHChEydOjXV1dWFbi8AAABAfbFBFzpv06ZNevTosd71bdu23eCG3urBBx/MZz7zmRxzzDFJkp133jm33XZbHnvssST/niV11VVX5fzzz8/xxx+fJLnpppvSvn373HrrrTnllFNSU1OT66+/PjfffHOOOOKIJMm4cePSsWPH3HvvvenTp0+ee+65TJgwIQ899FC6deuWJLn22mvTvXv3TJ8+PbvtttsH2g4AAAAAatugUGq//fbLzJkz17t+11133dB+ajnooIPy4x//OC+88EI+/vGP58knn8ykSZNy1VVXJUlmzJiRuXPnpnfv3uV1mjZtmkMOOSSTJ0/OKaeckilTpmTFihW1aqqqqtK5c+dMnjw5ffr0yYMPPpjKyspyIJUkBx54YCorKzN58uR3DKWWLVuWZcuWlR8vWrToA20vAAAAQH2xQaHU73//+9xxxx0plUrrVf/5z38+3/ve995XY0nyn//5n6mpqcknPvGJNGzYMKtWrcqIESPyxS9+MUkyd+7cJEn79u1rrde+ffu8+OKL5ZomTZpku+22W6tmzfpz585Nu3bt1nr9du3alWvWZdSoUbnwwgvf9/YBAAAA1FcbFEqVSqXsuOOOG1T/Qfz85z/PuHHjcuutt2bPPffM1KlTM2TIkFRVVeWrX/1qua6iomKt13372Lp6e2vNuurf63nOPffcnHnmmeXHixYtSseOHd9zuwAAAADquw0Kpd4r6Pmg9W931lln5ZxzzskXvvCFJEmXLl3y4osvZtSoUfnqV7+aDh06JPn3TKftt9++vN68efPKs6c6dOiQ5cuXZ8GCBbVmS82bN698fawOHTrklVdeWev1X3311bVmYb1V06ZN07Rp0w+0jQAAAAD10WZ997033ngjDRrUbrFhw4ZZvXp1kmSXXXZJhw4dMnHixPLy5cuX5/777y8HTl27dk3jxo1r1cyZMyfTpk0r13Tv3j01NTV55JFHyjUPP/xwampqNujC7gAAAACsnw2aKVW0Y489NiNGjMiOO+6YPffcM0888USuuOKKfP3rX0/y75lYQ4YMyciRI9OpU6d06tQpI0eOzDbbbJP+/fsnSSorKzNw4MAMHTo0bdq0SevWrTNs2LB06dKlfDe+3XffPUcddVROOumkXHPNNUmSk08+OX379nXnPQAAAIBNYIOvKXXRRRetd+0HNWbMmFxwwQUZNGhQ5s2bl6qqqpxyyin5zne+U645++yzs3Tp0gwaNCgLFixIt27dcs8996Rly5blmiuvvDKNGjXKCSeckKVLl+bwww/PjTfemIYNG5ZrbrnllgwePLh8l75+/fpl7NixH3gbAAAAAFjbBoVSP/zhD7No0aL1ru/Tp88GN/RWLVu2zFVXXZWrrrrqHWsqKioyfPjwDB8+/B1rmjVrljFjxmTMmDHvWNO6deuMGzfuA3QLAAAAwPraoFCqe/fum6oPAAAAAOqRzfpC5wAAAABsnYRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABRusw+l/vnPf+bLX/5y2rRpk2222SZ77713pkyZUl5eKpUyfPjwVFVVpXnz5unVq1eeeeaZWs+xbNmynH766Wnbtm1atGiRfv365eWXX65Vs2DBglRXV6eysjKVlZWprq7OwoULi9hEAAAAgHpnsw6lFixYkJ49e6Zx48a5++678+yzz+byyy/PtttuW64ZPXp0rrjiiowdOzaPPvpoOnTokCOPPDKvv/56uWbIkCG5/fbbM378+EyaNCmLFy9O3759s2rVqnJN//79M3Xq1EyYMCETJkzI1KlTU11dXeTmAgAAANQbjeq6gXfz/e9/Px07dswNN9xQHtt5553L/18qlXLVVVfl/PPPz/HHH58kuemmm9K+ffvceuutOeWUU1JTU5Prr78+N998c4444ogkybhx49KxY8fce++96dOnT5577rlMmDAhDz30ULp165Ykufbaa9O9e/dMnz49u+22W3EbDQAAAFAPbNYzpX7zm99kv/32y+c///m0a9cu++yzT6699try8hkzZmTu3Lnp3bt3eaxp06Y55JBDMnny5CTJlClTsmLFilo1VVVV6dy5c7nmwQcfTGVlZTmQSpIDDzwwlZWV5RoAAAAANp7NOpT6xz/+kR/96Efp1KlTfv/73+fUU0/N4MGD87Of/SxJMnfu3CRJ+/bta63Xvn378rK5c+emSZMm2W677d61pl27dmu9frt27co167Js2bIsWrSo1hcAAAAA722zPn1v9erV2W+//TJy5MgkyT777JNnnnkmP/rRj/KVr3ylXFdRUVFrvVKptNbY2729Zl317/U8o0aNyoUXXrhe2wIAAADA/9msZ0ptv/322WOPPWqN7b777nnppZeSJB06dEiStWYzzZs3rzx7qkOHDlm+fHkWLFjwrjWvvPLKWq//6quvrjUL663OPffc1NTUlL9mzZq1gVsIAAAAUD9t1qFUz549M3369FpjL7zwQnbaaackyS677JIOHTpk4sSJ5eXLly/P/fffnx49eiRJunbtmsaNG9eqmTNnTqZNm1au6d69e2pqavLII4+Uax5++OHU1NSUa9aladOmadWqVa0vAAAAAN7bZn363hlnnJEePXpk5MiROeGEE/LII4/kJz/5SX7yk58k+fcpd0OGDMnIkSPTqVOndOrUKSNHjsw222yT/v37J0kqKyszcODADB06NG3atEnr1q0zbNiwdOnSpXw3vt133z1HHXVUTjrppFxzzTVJkpNPPjl9+/Z15z0AAACATWCzDqX233//3H777Tn33HNz0UUXZZdddslVV12VL33pS+Was88+O0uXLs2gQYOyYMGCdOvWLffcc09atmxZrrnyyivTqFGjnHDCCVm6dGkOP/zw3HjjjWnYsGG55pZbbsngwYPLd+nr169fxo4dW9zGAgAAANQjm3UolSR9+/ZN375933F5RUVFhg8fnuHDh79jTbNmzTJmzJiMGTPmHWtat26dcePGfZBWAQAAAFhPm/U1pQAAAADYOgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACjcFhVKjRo1KhUVFRkyZEh5rFQqZfjw4amqqkrz5s3Tq1evPPPMM7XWW7ZsWU4//fS0bds2LVq0SL9+/fLyyy/XqlmwYEGqq6tTWVmZysrKVFdXZ+HChQVsFQAAAED9s8WEUo8++mh+8pOf5JOf/GSt8dGjR+eKK67I2LFj8+ijj6ZDhw458sgj8/rrr5drhgwZkttvvz3jx4/PpEmTsnjx4vTt2zerVq0q1/Tv3z9Tp07NhAkTMmHChEydOjXV1dWFbR8AAABAfbJFhFKLFy/Ol770pVx77bXZbrvtyuOlUilXXXVVzj///Bx//PHp3Llzbrrpprzxxhu59dZbkyQ1NTW5/vrrc/nll+eII47IPvvsk3HjxuXpp5/OvffemyR57rnnMmHChFx33XXp3r17unfvnmuvvTZ33nlnpk+fXifbDAAAALA12yJCqW9+85s55phjcsQRR9QanzFjRubOnZvevXuXx5o2bZpDDjkkkydPTpJMmTIlK1asqFVTVVWVzp07l2sefPDBVFZWplu3buWaAw88MJWVleWadVm2bFkWLVpU6wsAAACA99aorht4L+PHj8/jjz+eRx99dK1lc+fOTZK0b9++1nj79u3z4osvlmuaNGlSa4bVmpo168+dOzft2rVb6/nbtWtXrlmXUaNG5cILL9ywDQIAAABg854pNWvWrHzrW9/KuHHj0qxZs3esq6ioqPW4VCqtNfZ2b69ZV/17Pc+5556bmpqa8tesWbPe9TUBAAAA+LfNOpSaMmVK5s2bl65du6ZRo0Zp1KhR7r///vzXf/1XGjVqVJ4h9fbZTPPmzSsv69ChQ5YvX54FCxa8a80rr7yy1uu/+uqra83CequmTZumVatWtb4AAAAAeG+bdSh1+OGH5+mnn87UqVPLX/vtt1++9KUvZerUqfnoRz+aDh06ZOLEieV1li9fnvvvvz89evRIknTt2jWNGzeuVTNnzpxMmzatXNO9e/fU1NTkkUceKdc8/PDDqampKdcAAAAAsPFs1teUatmyZTp37lxrrEWLFmnTpk15fMiQIRk5cmQ6deqUTp06ZeTIkdlmm23Sv3//JEllZWUGDhyYoUOHpk2bNmndunWGDRuWLl26lC+cvvvuu+eoo47KSSedlGuuuSZJcvLJJ6dv377ZbbfdCtxiAAAAgPphsw6l1sfZZ5+dpUuXZtCgQVmwYEG6deuWe+65Jy1btizXXHnllWnUqFFOOOGELF26NIcffnhuvPHGNGzYsFxzyy23ZPDgweW79PXr1y9jx44tfHsAAAAA6oMtLpS67777aj2uqKjI8OHDM3z48Hdcp1mzZhkzZkzGjBnzjjWtW7fOuHHjNlKXAAAAALybzfqaUgAAAABsnYRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4YRSAAAAABROKAUAAABA4TbrUGrUqFHZf//907Jly7Rr1y7HHXdcpk+fXqumVCpl+PDhqaqqSvPmzdOrV68888wztWqWLVuW008/PW3btk2LFi3Sr1+/vPzyy7VqFixYkOrq6lRWVqaysjLV1dVZuHDhpt5EAAAAgHppsw6l7r///nzzm9/MQw89lIkTJ2blypXp3bt3lixZUq4ZPXp0rrjiiowdOzaPPvpoOnTokCOPPDKvv/56uWbIkCG5/fbbM378+EyaNCmLFy9O3759s2rVqnJN//79M3Xq1EyYMCETJkzI1KlTU11dXej2AgAAANQXjeq6gXczYcKEWo9vuOGGtGvXLlOmTMnBBx+cUqmUq666Kueff36OP/74JMlNN92U9u3b59Zbb80pp5ySmpqaXH/99bn55ptzxBFHJEnGjRuXjh075t57702fPn3y3HPPZcKECXnooYfSrVu3JMm1116b7t27Z/r06dltt92K3XAAAACArdxmPVPq7WpqapIkrVu3TpLMmDEjc+fOTe/evcs1TZs2zSGHHJLJkycnSaZMmZIVK1bUqqmqqkrnzp3LNQ8++GAqKyvLgVSSHHjggamsrCzXAAAAALDxbNYzpd6qVCrlzDPPzEEHHZTOnTsnSebOnZskad++fa3a9u3b58UXXyzXNGnSJNttt91aNWvWnzt3btq1a7fWa7Zr165csy7Lli3LsmXLyo8XLVr0PrYMAAAAoP7ZYmZKnXbaaXnqqady2223rbWsoqKi1uNSqbTW2Nu9vWZd9e/1PKNGjSpfGL2ysjIdO3Z8r80AAAAAIFtIKHX66afnN7/5Tf70pz9lhx12KI936NAhSdaazTRv3rzy7KkOHTpk+fLlWbBgwbvWvPLKK2u97quvvrrWLKy3Ovfcc1NTU1P+mjVr1vvbQAAAAIB6ZrMOpUqlUk477bT86le/yh//+MfssssutZbvsssu6dChQyZOnFgeW758ee6///706NEjSdK1a9c0bty4Vs2cOXMybdq0ck337t1TU1OTRx55pFzz8MMPp6amplyzLk2bNk2rVq1qfQEAAADw3jbra0p985vfzK233ppf//rXadmyZXlGVGVlZZo3b56KiooMGTIkI0eOTKdOndKpU6eMHDky22yzTfr371+uHThwYIYOHZo2bdqkdevWGTZsWLp06VK+G9/uu++eo446KieddFKuueaaJMnJJ5+cvn37uvMeAAAAwCawWYdSP/rRj5IkvXr1qjV+ww03ZMCAAUmSs88+O0uXLs2gQYOyYMGCdOvWLffcc09atmxZrr/yyivTqFGjnHDCCVm6dGkOP/zw3HjjjWnYsGG55pZbbsngwYPLd+nr169fxo4du2k3EAAAAKCe2qxDqVKp9J41FRUVGT58eIYPH/6ONc2aNcuYMWMyZsyYd6xp3bp1xo0b937aBAAAAGADbdbXlAIAAABg6ySUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUAgAAAKBwQikAAAAACieUepsf/vCH2WWXXdKsWbN07do1f/7zn+u6JQAAAICtjlDqLX7+859nyJAhOf/88/PEE0/kU5/6VI4++ui89NJLdd0aAAAAwFZFKPUWV1xxRQYOHJj/+I//yO67756rrroqHTt2zI9+9KO6bg0AAABgqyKU+l/Lly/PlClT0rt371rjvXv3zuTJk+uoKwAAAICtU6O6bmBz8a9//SurVq1K+/bta423b98+c+fOXec6y5Yty7Jly8qPa2pqkiSLFi3adI0WYPWyN+q6hfe0vv/Gm/u22I7Nz9ayLfVtO5KtZ1tsRzF8b21+6tt2JFvPttiOYvje2vzUt+1Itp5t2Vq2Y3O2ZhtKpdK71lWU3quinpg9e3Y+8pGPZPLkyenevXt5fMSIEbn55pvz/PPPr7XO8OHDc+GFFxbZJgAAAMAWYdasWdlhhx3ecbmZUv+rbdu2adiw4VqzoubNm7fW7Kk1zj333Jx55pnlx6tXr878+fPTpk2bVFRUbNJ+tySLFi1Kx44dM2vWrLRq1aqu26EA9nn9ZL/XT/Z7/WS/10/2e/1kv9dP9nv9s7H3ealUyuuvv56qqqp3rRNK/a8mTZqka9eumThxYj772c+WxydOnJjPfOYz61ynadOmadq0aa2xbbfddlO2uUVr1aqVN7R6xj6vn+z3+sl+r5/s9/rJfq+f7Pf6yX6vfzbmPq+srHzPGqHUW5x55pmprq7Ofvvtl+7du+cnP/lJXnrppZx66ql13RoAAADAVkUo9RYnnnhiXnvttVx00UWZM2dOOnfunN/97nfZaaed6ro1AAAAgK2KUOptBg0alEGDBtV1G1uVpk2b5rvf/e5apzqy9bLP6yf7vX6y3+sn+71+st/rJ/u9frLf65+62ufuvgcAAABA4RrUdQMAAAAA1D9CKQAAAAAKJ5QCAAAAoHBCKQAAAAAKJ5QCAAAAoHBCKerM6tWr67oFNrFXXnklK1asqOs2qANu7ApQv3jfh/rD8V5/rF69OqtWrao1trH3v1CKQsyYMSPXXXddrrrqqtxzzz1JkgYNGnhD24o98cQT2X777TNp0qS6boUCrVy5MklSUVFRHnOcb/2effbZPPjgg3XdBnXo9ddfz+zZs/P666+X/xjhj09bv8ceeyxf+cpXktR+32fr5nivnxzv9c9zzz2XQYMGpU+fPrngggvy61//Osm/9//G/HwvlGKTmzZtWvbbb7+MHz8+l156ac4444wcdthhWbJkyUb/hmbz8OSTT+aQQw7JGWeckUMPPbSu26Egzz77bE499dQcdthhGTZsWK0fXGy9nnzyyXTu3Dl//vOf67oV6sjTTz+dQw89NIcffni6du2aQYMGZfr06WnQoMFaf11l67HmZ33Lli3ruhUK5Hivnxzv9c/zzz+fHj16ZPHixdlll13yl7/8JWeccUYuuOCCJBs3mBJKsUm98cYbOfXUU3PiiSfm3nvvzTPPPJPLL788r776arp165ZXXnklFRUV/rqyFZk2bVp69OiRb33rW7n88stTKpXy/PPP5w9/+EP+8Y9/1HV7bCLPP/98evbsmdWrV+ejH/1o/v73v+eLX/xiRowYUa4RQG99nnzyyXTv3j1nn312zj777Lpuhzrw0ksv5fDDD0/Pnj3zox/9KAMGDMjMmTNz+OGH54knnkjDhg39oroVevLJJ9OjR48MGjQoP/jBD9ZZ47Pd1sfxXj853uufUqmUn/zkJ+ndu3fGjRuXa6+9Nj/72c8yePDgXH755eXPfBvtD88l2ITmz59f6tKlS+lXv/pVeWz16tWlF154obTvvvuW9t5771rjbNnefPPNUt++fUsNGjQoj336058ude3atVRRUVHaa6+9SgMHDqzDDtlUhg4dWvrMZz5Tfvzqq6+WfvCDH5QaN25cOv/888vjjvOtxwsvvFCqqKgoXXTRRaVSqVRauXJl6dZbby2df/75peuuu6704IMP1nGHFOF//ud/Sj179iy98cYb5bEnnnii9NnPfra07bbblp588slSqeTY35rMnj27tM0225QGDBhQKpX+/bP/rLPOKn3mM58pHXTQQaVLL720NGvWrFKpZL9vbRzv9Y/jvf469thjS8cff3ytsUWLFpV+8IMflFq3bl264oorNtprmSnFJtWqVausXr06f/rTn8pjFRUV6dSpU2644Ya88cYbOe2008rjbNkaN26c8847L506dUrPnj3Tu3fvNGjQIJdffnmmTZuWr371q3nooYfMqNjKlEql/OMf/0iTJk3KY23bts2pp56aH//4xxk1alR++MMfJnGcby1KpVL5enGdOnVKkhxxxBG5/PLLc/vtt+fKK6/MgAEDMn78+LpskwIsXLgwU6dOzZIlS8pje++9d0aNGpVevXrl61//embPnu3Y34rMnj07+++/fx577LH87W9/y/HHH58HH3wwu+yySz7ykY/kF7/4Rc4444zybHi2Ho73+sfxXn8dfPDBmTt3bl544YXyWMuWLXPCCSdk4MCBueOOOzJ37tyN8lpCKTaZUqmUhg0b5vOf/3wee+yx/O53v6u1vEuXLvniF7+Yp556Km+88UYddcnG1KBBg3Tv3j233HJLFi5cmPnz5+eaa67JIYcckj322CPf+MY3csghh2Ty5MlZvHhxXbfLRlJRUZGDDz44U6dOzXPPPVceb9CgQfr3759vf/vb+dGPfpQZM2bUYZdsTBUVFfn85z+fSy+9NP37988OO+yQtm3b5rbbbsszzzyT8ePH5+CDD85ll12WF198sa7bZRPab7/9suuuu+ZXv/pVli1bVh7fbbfd8o1vfCMrV67ME088UYcdsrF17do1V1xxRaqqqvLxj388SXLHHXfkyiuvzPjx43PKKadk6tSpefzxx+u4UzY2x3v943ivv7p27ZrZs2fnlltuyfz588vjbdu2zWc+85k89NBDG+2zvVCKTWZNWl5dXZ1SqZQf/OAHue+++2ot32OPPTJ79uxaf3Fhy7fvvvvmlltuyYgRI9KuXbsk/z7XvFmzZtlpp52yaNGiNGjg7Wdrst9++6WysjI33HBDXn755fJ4s2bNctRRR+Wf//xn5syZU4cdsrF96EMfyje/+c1cdtll+djHPlaeJZkknTt3zuc+97lMmzbNft/K7bXXXtl9991z9dVXZ/LkybWuJ9O7d+8sW7Ysf/zjH+uwQzam0v9eG3DffffNRRddlGHDhmXo0KFp06ZNed8PHDgwNTU1eeihh+qyVTYBx3v94niv3w499NAMHTo0I0aMyI9//OPMnj27vKxTp07ZfffdN9prNdpozwTrUCqV8tGPfjQ/+clP0r9//4wePTozZ87MgAEDsmzZsjzyyCOpqqpK8+bN67pVNqKKiop88pOfTJJy+LTmv3/961+z1157pXHjxnXWHxvfQQcdlP79++fqq69Os2bNMmDAgHz0ox9N8u+/oO6www61/qrK1qFZs2b5j//4jxxxxBH5xCc+keTfAXSDBg3Stm3b7Lrrrtl2223rtkk2mTX7+rbbbku3bt1y6qmn5sorr8yRRx5Zfo/fdddds8MOO9Rxp2wsa+62VFFRkW7dumX77bfP9ttvnyTli1zPnz8/H//4x8ufA9g6ON7rjzXHuOO9/lqz30877bQsX748w4cPz4svvph+/fqlc+fOGTt2bP71r39l55133iivV1EquR0SH9zKlStTKpVqBQ1rfnit+e+zzz6bb3/723n66aezdOnS7Lrrrnnqqafyxz/+MXvvvXfdNc8HsuZN673Mnz8/l112Wa677rrcd9992WOPPQrojo1t1apVadiwYa39vuYYT5KRI0fmZz/7WfbZZ5987WtfK4fSt956azmEZsvz8ssv59VXX80+++yz3uucffbZuf/++3P33XendevWm7A76tLKlSvTqNG//8Z52GGHZd68eTnggANy4IEH5sknn8y4cePyyCOPZLfddqvjTinK8OHDc+utt2bixInZaaed6rodNiLH+9ZtyZIladGixQat43jfer318/24ceNy0003ZfLkydl5552zZMmS3H777Rv0ufDdCKX4wJ599tlceOGFmT17dnbdddf07t07X/ziF5P83y+wa76p//Wvf2XmzJm5++67s8MOO+RTn/pUdt111zreAjbUkiVLsnr16pRKpbRq1eo96ydMmJDbbrstf/jDH/Lb3/52o72BUazHH388Q4YMyd13373Wh5a3/uC66aabcvvtt+c3v/lN9txzzyxevDi/+tWv7Pct1DPPPJOjjz46n//853P55ZeX39ffyfPPP59rrrkmN954Y+6//35/Pd1KvfWYf+svqiNGjMjkyZMzc+bM7LDDDhk9enT22muvumyVgtx999353e9+l5tvvjn33XefPzhuoaZPn55//etf6dmz5zqXO963TtOnT8/FF1+cUaNGrddsN8f7lu+ll17K008/nTlz5uSYY45Jq1at0qJFi1p/eH7rZ75XX301r7zySlasWJGqqqq0b99+o/UilOIDeeGFF3LAAQfk2GOPTadOnfKHP/whr7/+evbaa6/ccMMNSZLly5fXuisXW7Znn302Z5xxRvmNafTo0fnSl770jjNnkn/PspgwYUIOO+yw8ildbFmefPLJ9OzZM6ecckouv/zy8vhb9/tbP6guWbIkM2bMKJ/GtebaYmxZnnzyyfTo0SPt27fP0qVL8+STT661L9/6PTBt2rSMHTs2jzzySG644Qa/nGwFpk+fnmuuuSazZ8/O3nvvnd69e2ffffdNUntG9JpTPdaMv/7662natGmaNWtWl+3zPs2YMSN33HFHXn755RxwwAE58cQT16p5+8/673//+5k8eXJGjBiRzp07F9kuG8nUqVPTs2fPXHLJJTn99NPXWeN43/o8+eST6d69e95888389Kc/zYABA9aqcbxvXZ566qn07t07VVVVmTFjRlq2bJkTTzwxgwYNyi677JJSqZRSqVTYNYCFUrxvpVIpF1xwQaZPn57//u//TpK88cYbueGGG3LNNddk9913z89//vNy/Q033JAjjjgiHTt2rKuW+YCeffbZHHzwwfnKV75Svj3smDFj8sgjj6zzLyQ33HBDDjvssOy0005r/TBjy/HUU0+lR48eGTRoUEaPHl0ef/PNN8sfQNf3NE62HGs+pJ5xxhk544wz0qtXr3zxi1/Meeedl6T2tWXe6vHHH09VVVU6dOhQF22zET377LPp0aNHPvWpT2XbbbfNvffem06dOuW4447LmWeemaR2GD1z5syNdn0J6s7TTz+dT3/60/nEJz6RN954Iw899FAuueSSnHXWWeusf+t+r6mpSWVlZYHdsrGs+SPEN77xjVx22WXrrHnrZznH+9Zhzc/6008/PatXr87DDz+cX/ziF+/4M9zxvuVbuHBhjjjiiBx22GE599xzs9122+Wiiy7Kvffem+222y6XX355rTOZrrzyyqxatSrDhg3bdE2V4AMYMGBA6aCDDqo19sYbb5Suu+660j777FM655xzSqVSqfSXv/yltOuuu5a+/OUvl1auXFkXrfIBvfbaa6XevXuXBg8eXGv80EMPLY+tXr26PD5p0qRSp06dSl/60pdKK1asqLWMLcecOXNKHTp0KPXp06dUKpVKK1euLJ1++umlPn36lHbZZZfSRRddVHr88cfL9d///vdLF110UV21y0by5JNPlpo2bVo677zzSqVSqbRq1arS5z73udL++++/zvrRo0eXhg8fXmSLbGLLly8vfeUrXykNHDiwPPbiiy+WTj311NK+++5buvjii2vVX3bZZaUjjjii9NhjjxXdKhvRzJkzS7vuumvp7LPPLn9eu/7660sdOnQo/fWvf12rfs1+f/jhh4tulY3ohRdeKDVt2rR0/vnnl0qlfx//v/rVr0pjxowpjR8/vvTKK6/Uqne8bx0ee+yxUqtWrco/62+77bZSZWVladKkSaVS6d8/+9/K8b51ePHFF0s77bRT6fe//32t8Ztuuql08MEHl/r371+aM2dOqVQqlRYuXFg64ogjSr169SrNnz9/k/Vk2gLvS+kttwhdtWpVnn/++fKy5s2b5/Of/3yOPPLI/OlPf8qCBQvSo0ePnH322bnooove9VokbL5WrFiRhQsX5nOf+1ySf/+1LEk++tGP5rXXXkuSWjMmevbsmbPOOivf+9730qhRI7NotmDdu3fPa6+9ll//+tfp27dvnnvuuXTt2jX/7//9v/ziF7/IJZdckunTp2fhwoV5/PHHc/fdd2f+/Pl13TYfwLJly3L22WdnxIgR5b+MX3zxxXnhhRfyox/9qFbt/PnzM2XKlPz+978vvxew5WvcuHHmzJlT/nlfKpWy44475jvf+U4OPvjg3HnnnbnlllvK9W3bts3y5cs36jUmKNbq1aszfvz47LrrrjnvvPPKn9cOOOCANG7cuHwL+Ldas9/dxGLLtXLlyowdOzYf+tCHyrPeP/OZz+TCCy/M1Vdfnerq6vzHf/xH7rvvvvI6jvct35IlS3LIIYdk4MCBGTFiRJLkC1/4Qvbbb7985zvfycqVK9c6w8HxvnVo2LBhmjdvntmzZyf593tAknzlK1/Jl770pUybNi333HNPkqSysjI/+9nPcvPNN2e77bbbZD05fY8P5O9//3sOPPDAHHvssbn66qvTsmXL8rI5c+Zkhx12yC9+8Yv8v//3/+qwSzaWv/71r+nUqVOSf4dUjRs3zne/+93MmDEjP/vZz8p1pvNuXebMmZNzzjknv/jFL/KpT30q48ePL99N7Y477sipp56aq6++OieeeGJmzpyZpk2blm8ZzNahVCpl0aJFGTBgQJo0aZJbb721/GG1oqLCft/KrFq1KqtXr84pp5yShQsX5tZbb03Tpk3L15d46aWXcuqpp6Zx48b59a9/XV5v0aJF63XzCzZfDzzwQO6+++6MGjWqPLZ69ersuuuu+elPf5pevXqttY79vuX761//mssuuyxPPfVU/vnPf+aTn/xkrrjiinzsYx/Lc889ly984Qv5xCc+kf/5n/8pr2O/b/neeiremgtaX3fddbn00ktz2223Zd99913r8hv2+9ahX79+mTVrVv70pz9l2223rXUq/uc///n885//zOTJkwu7/IqZUnwgH/vYx/KLX/wit956a84999z861//Ki9r0qRJ9tlnn7Rp06YOO2RjWhNIrV69Oo0bN07y7x9ir7zySrlm1KhRuf7668upO1u+7bffPqNGjcqZZ56Z8847L61bty7PlDvuuOPSpk2bPPDAA0mSnXfeWTCxFaqoqEhlZWWqq6vz3//933nooYfKsx9LpZL9vpVYMxOmYcOGady4cb761a/mN7/5TX7yk5+koqKifFHzHXfcMRdeeGF++9vfZurUqeXZVH5R2TK9dQbUwQcfXA6k1uzXNRe1XrFiRbnuD3/4Q+bOnZvEft9SvXW/d+rUKWeffXY6deqUvfbaK1deeWU+/vGPp2HDhuncuXOuvvrq/OpXv8rTTz9d/vlvv2+Z3rrfd9ppp/L/r5kZ+YUvfCFLly7NT3/60yQpBxLe57dcS5Ysyeuvv55FixaVx37605+mpqYmJ5xwQpYvX14OpJKkT58+KZVKWb58eWHXAxZK8YEdeuih+e///u9cd911Ofnkk3PbbbflmWeeyaWXXpqXX345H/vYx+q6RTayBg0a1PqwuuYH2Xe+852cf/75Ofzww2u9ubHlq6qqytlnn50ePXok+b/vgQULFqRNmzbZb7/96rhDitC3b98ceeSR+dGPfpSlS5fWugMTW7YXXnghV111VebMmVMeO+SQQ/L9738/Z5xxRq677rok//cLyoc+9KHsscce2WabbXwPbMHWtd/f+vN95cqVeeONN9KgQYPyL6PnnXdejjzyyHI4wZZnXfv9Yx/7WC6++OKcdtpp5dkzpf+9A9ebb76Zj3/842nfvr2b1mzB3r7f3/7evWrVqnzoQx/KOeeckwkTJmTKlCnlZd7nt0zPPvtsjj/++BxyyCHZfffdc8stt2T16tVp27Ztbr311jz//PPp3bt3pk+fnjfffDNJ8sgjj6Rly5Yp8oQ6vzWyURx77LGZPHlyzjzzzJxzzjlp1KhRGjdunLvvvtvd9rZSpf+961bDhg3TsWPHXHbZZRk9enQee+wxt4HfSr39lMyKiopceeWVmTNnTg499NA66ooiNWnSJIceemhGjRqVmpqaNG/evK5bYiP429/+lu7du2fBggV57bXXcuaZZ6Zt27ZJkm984xtZsmRJTj755MycOTOf/exns9NOO+VnP/tZli5d6lTtLdg77fe3/vLZoEGDNGzYMKVSKY0aNcr3vve9/Nd//Vcefvhh15XZQr3b8b7jjjumY8eO5e+BNf994IEHssMOO6Rp06Z11jcfzLvt9zXW/JG5W7duefPNN/Pwww+na9euddEuG8G67pr+ta99LXvssUf22WefHHjggfnd736X/v3755hjjsl2222X7bffPvfdd1/+/Oc/F3q8u6YUG9WiRYsyf/78LF68OB06dFjrzY6tz4gRI3LBBRekVatWuffee82YqSfGjx+f++67L7/4xS/yhz/8Ifvss09dt8QmtiaIXrBgQY488sj8z//8j9uBbwWWLFmSwYMHZ/Xq1dlvv/1y+umnZ9iwYTnrrLPy4Q9/OMm/T9m+5ZZbcvbZZ5dnzLz++uv57W9/69jfQr3Tfj/77LPX+dlt3333TaNGjfLkk0/mL3/5i5/1W6j12e9r3uuTZNq0aRk/fnzGjBmTSZMmpUuXLnXZPu/Thh7vSTJgwIA89NBDefrpp92waAs0f/78fPGLX8wnPvGJXH311eXxww47LF26dMnVV19d61j/wQ9+kJdffjnNmzfPiSeemN12263Qfs2UYqNq1aqVc43rmT59+uSCCy7I5MmTs8cee9R1OxRkjz32yLhx4/LnP/85e+65Z123QwHWfHDZdtttc//996dFixZ13BEbQ4MGDdK1a9e0adMmJ554Yj784Q/nC1/4QpKUg6kGDRqkuro6n/rUp/LSSy9l6dKl6dy5cz7ykY/Ucfe8X++239/6i+qqVatSU1OTf/zjH1m8eHGeeOIJwcQWbH32+5r3+pkzZ+ass87KCy+8kPvvv99+34Kt7/Ge/F8o+Y1vfCPf/e53y9eQZcuyrrumN2jQYK27pq+5wP03v/nNumzXTCngg1uyZIlfUOuh5cuXp0mTJnXdBvABvf09/Oc//3m++MUvZujQofnP//zPtG3bNitXrszs2bOz44471mGnbEzvtt/POeectGnTJitXrkxNTU0ee+yx7LDDDv4IsRVYn/2+atWqzJ8/P0uWLEmDBg0c91uB9dnvq1evzsyZM/PRj360DjtlY1nfu6a//vrradmyZZLaMyWLZKYU8IEJpOongRRsHda8h69atSoNGjTIiSeemFKplP79+6eioiJDhgzJZZddlhdffDE/+9nPXNx8K7G++33mzJkZN25cttlmmzrumI1hfff7jBkzctttt6VZs2Z13DEbw4a8z998881p3ry59/kt3PreNb1p06YZPHhwnZ6mKZQCAKB8QevVq1fnC1/4QioqKlJdXZ3f/OY3+fvf/55HH33UHyG2Qu+23//2t7/lscceE0hthd7reH/kkUcEUluh9Xmfd7xvXdbcMXvNHZPfetf0iy++OE888USd3zXd6XsAAJSt+WhYUVGRww8/PFOnTs19993nmjJbOfu9frLf6yf7vX5Zc02p4cOHZ86cOenUqVO+/e1vZ/Lkydl3333ruj0zpQAA+D9rLn561lln5U9/+lOmTp3qF5V6wH6vn+z3+sl+r18aNGiQJGncuHGuvfbatGrVKpMmTdosAqkkaVDXDQAAsPnZc8898/jjj+eTn/xkXbdCgez3+sl+r5/s9/qlT58+SZLJkydnv/32q+Nu/o/T9wAAWEtd3YWHumW/10/2e/1kv9c/m+Nd04VSAAAAABTO6XsAAAAAFE4oBQAAAEDhhFIAAAAAFE4oBQAAAEDhhFIAAAAAFE4oBQAAAEDhhFIAAAAAFK5RXTcAAEBtkydPzqBBg9a57Kijjspjjz2Wf/3rX+tc/sgjj+THP/5xfvrTn65z+be//e3st99+Oe6449a5/JOf/GR+9rOfva++AQA2hFAKAGAzs2jRohx33HEZPnx4rfGZM2fmnHPOyeLFizN16tS11uvVq1dWr16d2bNn56qrrkqvXr1qLb/xxhvzr3/9K2+++Wb23nvv3HjjjWs9x4EHHrjxNgQA4F04fQ8AAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAACicUAoAAACAwgmlAAAAAChco7puAACA2iorK3PnnXfmzjvvXGtZnz59snDhwuy3337rXLdBgwbZYYcdMmzYsHUuP++889K8efNMmzZtnc/RpUuXD9Y8AMB6qiiVSqW6bgIAAACA+sXpewAAAAAUTigFAAAAQOGEUgAAAAAUTigFAAAAQOGEUgAAAAAUTigFAAAAQOGEUgAAAAAUTigFAAAAQOGEUgAAAAAU7v8DttsBrT1E0TUAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 1200x600 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "热门文章统计信息：\n",
      "Top20文章总点击量：234,909\n",
      "Top20文章占总点击量的14.41%\n",
      "平均每篇文章点击量：11745.45\n",
      "\n",
      "Top20热门文章点击量明细：\n",
      " 1. 文章ID:   272143, 点击次数: 15935\n",
      " 2. 文章ID:   234698, 点击次数: 15666\n",
      " 3. 文章ID:   123909, 点击次数: 15383\n",
      " 4. 文章ID:   336221, 点击次数: 15170\n",
      " 5. 文章ID:    96210, 点击次数: 14009\n",
      " 6. 文章ID:   336223, 点击次数: 13998\n",
      " 7. 文章ID:   183176, 点击次数: 13277\n",
      " 8. 文章ID:   168623, 点击次数: 13041\n",
      " 9. 文章ID:   162655, 点击次数: 11968\n",
      "10. 文章ID:   331116, 点击次数: 11511\n",
      "11. 文章ID:    64329, 点击次数: 10647\n",
      "12. 文章ID:   199198, 点击次数:  9942\n",
      "13. 文章ID:   235616, 点击次数:  9681\n",
      "14. 文章ID:   336245, 点击次数:  9653\n",
      "15. 文章ID:   160974, 点击次数:  9615\n",
      "16. 文章ID:   124749, 点击次数:  9530\n",
      "17. 文章ID:   336220, 点击次数:  9346\n",
      "18. 文章ID:   233717, 点击次数:  9158\n",
      "19. 文章ID:   289090, 点击次数:  8918\n",
      "20. 文章ID:   156560, 点击次数:  8461\n"
     ]
    }
   ],
   "source": [
    "def get_item_topk_click(click_df, k):\n",
    "    \"\"\"获取点击量最高的前K个文章\n",
    "    \n",
    "    Args:\n",
    "        click_df (pd.DataFrame): 点击数据\n",
    "        k (int): 返回的文章数量\n",
    "    \n",
    "    Returns:\n",
    "        pd.Series: 前K个热门文章的ID\n",
    "    \"\"\"\n",
    "    # 统计每个文章的点击次数\n",
    "    article_clicks = click_df['click_article_id'].value_counts()\n",
    "    \n",
    "    # 获取前K个热门文章\n",
    "    topk_click = article_clicks.head(k)\n",
    "    \n",
    "    return topk_click\n",
    "\n",
    "def analyze_article_popularity(click_df, top_k=20):\n",
    "    \"\"\"分析文章热度分布\n",
    "    \n",
    "    Args:\n",
    "        click_df (pd.DataFrame): 点击数据\n",
    "        top_k (int): 要展示的热门文章数量\n",
    "    \"\"\"\n",
    "    # 获取前k个热门文章\n",
    "    top_articles = get_item_topk_click(click_df, top_k)\n",
    "    \n",
    "    # 绘制热门文章点击量分布\n",
    "    plt.figure(figsize=(12, 6))\n",
    "    plt.bar(range(len(top_articles)), top_articles.values)\n",
    "    plt.title(f'热门文章点击量分布(Top{top_k})')\n",
    "    plt.xlabel('文章排名')\n",
    "    plt.ylabel('点击次数')\n",
    "    plt.xticks(rotation=45)\n",
    "    plt.tight_layout()\n",
    "    plt.show()\n",
    "    \n",
    "    # 计算统计信息\n",
    "    total_clicks = click_df['click_article_id'].count()\n",
    "    top_clicks = top_articles.sum()\n",
    "    top_articles_ratio = top_clicks / total_clicks * 100\n",
    "    \n",
    "    print(f'\\n热门文章统计信息：')\n",
    "    print(f'Top{top_k}文章总点击量：{top_clicks:,}')\n",
    "    print(f'Top{top_k}文章占总点击量的{top_articles_ratio:.2f}%')\n",
    "    print(f'平均每篇文章点击量：{top_articles.mean():.2f}')\n",
    "    \n",
    "    return top_articles\n",
    "\n",
    "# 测试函数\n",
    "print('分析热门文章...')\n",
    "top_articles = analyze_article_popularity(all_click_df, top_k=20)\n",
    "\n",
    "# 显示热门文章明细\n",
    "print('\\nTop20热门文章点击量明细：')\n",
    "for i, (article_id, clicks) in enumerate(top_articles.items(), 1):\n",
    "    print(f'{i:2d}. 文章ID: {article_id:8d}, 点击次数: {clicks:5d}')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b722a057",
   "metadata": {},
   "source": [
    "# 基于物品的协同过滤算法\n",
    "\n",
    "实现基于ItemCF的文章相似度计算：\n",
    "1. 构建用户-文章交互矩阵\n",
    "2. 计算文章之间的相似度\n",
    "3. 使用用户序列长度进行惩罚"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "0393fca6",
   "metadata": {},
   "outputs": [],
   "source": [
    "def itemcf_sim(df):\n",
    "    \"\"\"文章与文章之间的相似性矩阵计算\n",
    "    \n",
    "    Args:\n",
    "        df: 数据表\n",
    "        \n",
    "    Returns:\n",
    "        dict: 文章与文章的相似性矩阵\n",
    "    \"\"\"\n",
    "    # 1. 获取用户的点击序列\n",
    "    user_item_time_dict = get_user_item_time(df)\n",
    "    \n",
    "    # 2. 计算物品相似度\n",
    "    i2i_sim = {}\n",
    "    item_cnt = defaultdict(int)\n",
    "    \n",
    "    # 遍历所有用户的点击序列\n",
    "    print('正在计算文章共现频率...')\n",
    "    for user, item_time_list in tqdm(user_item_time_dict.items()):\n",
    "        # 统计物品出现次数\n",
    "        for i, i_click_time in item_time_list:\n",
    "            item_cnt[i] += 1\n",
    "            i2i_sim.setdefault(i, {})\n",
    "            \n",
    "            # 计算物品共现矩阵\n",
    "            for j, j_click_time in item_time_list:\n",
    "                if i == j:\n",
    "                    continue\n",
    "                \n",
    "                i2i_sim[i].setdefault(j, 0)\n",
    "                # 使用序列长度进行惩罚\n",
    "                i2i_sim[i][j] += 1 / math.log(len(item_time_list) + 1)\n",
    "    \n",
    "    # 3. 计算最终的相似度矩阵\n",
    "    print('正在计算最终相似度...')\n",
    "    i2i_sim_ = i2i_sim.copy()\n",
    "    for i, related_items in tqdm(i2i_sim.items()):\n",
    "        for j, wij in related_items.items():\n",
    "            # 使用余弦相似度公式\n",
    "            i2i_sim_[i][j] = wij / math.sqrt(item_cnt[i] * item_cnt[j])\n",
    "    \n",
    "    # 4. 保存相似性矩阵\n",
    "    print('正在保存相似度矩阵...')\n",
    "    pickle.dump(i2i_sim_, open(save_path + 'itemcf_i2i_sim.pkl', 'wb'))\n",
    "    \n",
    "    return i2i_sim_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "4072c4fa",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "开始计算文章相似度矩阵...\n",
      "正在计算文章共现频率...\n",
      "正在计算文章共现频率...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 250000/250000 [00:15<00:00, 16645.29it/s]\n",
      "100%|██████████| 250000/250000 [00:15<00:00, 16645.29it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "正在计算最终相似度...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 35380/35380 [00:02<00:00, 14051.23it/s]\n",
      "100%|██████████| 35380/35380 [00:02<00:00, 14051.23it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "正在保存相似度矩阵...\n",
      "\n",
      "分析相似度矩阵...\n",
      "\n",
      "分析相似度矩阵...\n",
      "相似度矩阵统计信息：\n",
      "总物品数：35380\n",
      "平均相似度：0.0108\n",
      "相似度矩阵统计信息：\n",
      "总物品数：35380\n",
      "平均相似度：0.0108\n",
      "最大相似度：1.9494\n",
      "最小相似度：0.0001\n",
      "最大相似度：1.9494\n",
      "最小相似度：0.0001\n",
      "相似度标准差：0.0376\n",
      "相似度标准差：0.0376\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA0EAAAHUCAYAAAD8ySMAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAjNUlEQVR4nO3de5CU5Znw4XtkZCAGhoBioBgOEgUBNQhGEIi4CkqQClUbTCxDUJOqNYsHQrkJuO6umJjBWnXNrhGjRWCprOC6CnE1HrAU0CAGCGhcjecDq7AUSZgBUrZh5v3+yOdUxhkOPUC/0M91VfUf/fbT3fc81b5VP/tARZZlWQAAACTiqLwHAAAAKCURBAAAJEUEAQAASRFBAABAUkQQAACQFBEEAAAkRQQBAABJEUEAAEBSRBAAAJCUyrwHAODI9corr8TIkSP3uuaZZ56JMWPG7HXN2rVrY/fu3ft8rCNh3YknnrjXNQDkTwQB0GYNDQ0xZMiQePbZZ1u9ffTo0VFRUbHPNQ0NDfv1WEfCOgAOfz4OBwAAJEUEAQAASRFBAABAUkQQAACQlLKJoFWrVsWkSZOiZ8+eUVFREcuWLSv6MbIsi1tuuSVOOumkqKqqipqamvjhD3948IcFAAByUza/Drdr16447bTT4rLLLou//uu/btNjXHPNNfHEE0/ELbfcEqecckrU1dXFtm3bDvKkAABAnsomgiZMmBATJkzY4+0fffRRXH/99fEf//EfsX379hgyZEjcfPPNMXbs2Ij48791MW/evHjppZdiwIABJZoaAAAotbL5ONy+XHbZZfHLX/4ylixZEi+++GJMmTIlLrjggnj99dcjIuK///u/44QTToiHH344+vXrF3379o1vfetb8fvf/z7nyQEAgIMpiQh68803Y/HixXH//ffHmDFjon///nHttdfG6NGjY8GCBRER8dZbb8W7774b999/fyxatCgWLlwY69evj6985Ss5Tw8AABxMZfNxuL359a9/HVmWxUknndTseKFQiG7dukVERGNjYxQKhVi0aFHTuvnz58ewYcPi1Vdf9RE5AAAoE0lEUGNjY7Rr1y7Wr18f7dq1a3bbpz/96YiI6NGjR1RWVjYLpZNPPjkiIt577z0RBAAAZSKJCBo6dGg0NDTE1q1bY8yYMa2uGTVqVOzevTvefPPN6N+/f0REvPbaaxER0adPn5LNCgAAHFplE0E7d+6MN954o+n622+/HRs3boyuXbvGSSedFJdcckl84xvfiFtvvTWGDh0a27Zti6eeeipOOeWU+NKXvhTnnXdenH766XH55ZfH7bffHo2NjTF9+vQYN25ci4/RAfBn7dq1ixdeeCG6dOnS6u0NDQ3R0NCwzzVHHXXUfj3WkbAOgMNfRZZlWd5DHAwrVqyIc845p8XxadOmxcKFC+NPf/pT/OAHP4hFixbF+++/H926dYuRI0fGnDlz4pRTTomIiA8++CCuuuqqeOKJJ+KYY46JCRMmxK233hpdu3Yt9Z8DAAAcImUTQQAAAPsj1/ftd+/eHddff33069cvOnbsGCeccELceOON0djYmOdYAABAGcv1O0E333xz3HXXXfHv//7vMXjw4Fi3bl1cdtllUV1dHddcc02eowEAAGUq1wh67rnn4stf/nJMnDgxIiL69u0bixcvjnXr1u3X/RsbG+ODDz6ITp06RUVFxaEcFQAAOIxlWRY7duyInj177vOHanKNoNGjR8ddd90Vr732Wpx00knxwgsvxLPPPhu33357q+sLhUIUCoWm6++//34MGjSoRNMCAACHu02bNkWvXr32uibXCPre974XdXV1MXDgwGjXrl00NDTETTfdFBdffHGr62tra2POnDktjm/atCk6d+58qMcFAAAOU/X19VFTUxOdOnXa59pcfx1uyZIl8Xd/93fxz//8zzF48ODYuHFjzJgxI2677baYNm1ai/WffCfo4z+0rq5OBAEAQMLq6+ujurp6v9og1wiqqamJWbNmxfTp05uO/eAHP4if/exn8dvf/naf9y/mDwUAAMpXMW2Q609k//GPf2zxpaV27dr5iWwAAOCQyfU7QZMmTYqbbropevfuHYMHD44NGzbEbbfdFpdffnmeYwEAAGUs14/D7dixI/7hH/4hli5dGlu3bo2ePXvGxRdfHP/4j/8Y7du33+f9fRwOAACIOIK+E3SgRBAAABBxBH0nCAAAoNREEAAAkBQRBAAAJEUEAQAASRFBAABAUkQQAACQFBEEAAAkRQQBAABJEUEAAEBSRBAAAJCUyrwHKCd9Zz3Spvu9M3fiQZ4EAADYE+8EAQAASRFBAABAUkQQAACQFBEEAAAkRQQBAABJEUEAAEBSRBAAAJAUEQQAACRFBAEAAEkRQQAAQFJEEAAAkBQRBAAAJEUEAQAASRFBAABAUkQQAACQFBEEAAAkRQQBAABJEUEAAEBSRBAAAJAUEQQAACRFBAEAAEkRQQAAQFJEEAAAkBQRBAAAJEUEAQAASRFBAABAUnKNoL59+0ZFRUWLy/Tp0/McCwAAKGOVeT752rVro6Ghoen6Sy+9FOPGjYspU6bkOBUAAFDOco2g4447rtn1uXPnRv/+/ePss8/OaSIAAKDc5RpBf+mjjz6Kn/3sZzFz5syoqKhodU2hUIhCodB0vb6+vlTjAQAAZeKw+WGEZcuWxfbt2+PSSy/d45ra2tqorq5uutTU1JRuQAAAoCwcNhE0f/78mDBhQvTs2XOPa2bPnh11dXVNl02bNpVwQgAAoBwcFh+He/fdd+PJJ5+MBx98cK/rqqqqoqqqqkRTAQAA5eiweCdowYIF0b1795g4cWLeowAAAGUu9whqbGyMBQsWxLRp06Ky8rB4YwoAAChjuUfQk08+Ge+9915cfvnleY8CAAAkIPe3XsaPHx9ZluU9BgAAkIjc3wkCAAAoJREEAAAkRQQBAABJEUEAAEBSRBAAAJAUEQQAACRFBAEAAEkRQQAAQFJEEAAAkBQRBAAAJEUEAQAASRFBAABAUkQQAACQFBEEAAAkRQQBAABJEUEAAEBSRBAAAJAUEQQAACRFBAEAAEkRQQAAQFJEEAAAkBQRBAAAJEUEAQAASRFBAABAUkQQAACQFBEEAAAkRQQBAABJEUEAAEBSRBAAAJAUEQQAACRFBAEAAEkRQQAAQFJEEAAAkBQRBAAAJEUEAQAASRFBAABAUkQQAACQlNwj6P3334+vf/3r0a1bt/jUpz4Vn//852P9+vV5jwUAAJSpyjyf/A9/+EOMGjUqzjnnnHj00Ueje/fu8eabb0aXLl3yHAsAAChjuUbQzTffHDU1NbFgwYKmY3379s1vIAAAoOzl+nG4hx56KIYPHx5TpkyJ7t27x9ChQ+Oee+7Z4/pCoRD19fXNLgAAAMXINYLeeuutmDdvXpx44onx+OOPxxVXXBFXX311LFq0qNX1tbW1UV1d3XSpqakp8cQAAMCRriLLsiyvJ2/fvn0MHz48Vq9e3XTs6quvjrVr18Zzzz3XYn2hUIhCodB0vb6+PmpqaqKuri46d+5ckpn3pu+sR9p0v3fmTjzIkwAAQFrq6+ujurp6v9og13eCevToEYMGDWp27OSTT4733nuv1fVVVVXRuXPnZhcAAIBi5BpBo0aNildffbXZsddeey369OmT00QAAEC5yzWCvvOd78SaNWvihz/8Ybzxxhtx7733xt133x3Tp0/PcywAAKCM5RpBZ5xxRixdujQWL14cQ4YMie9///tx++23xyWXXJLnWAAAQBnL9d8Jioi48MIL48ILL8x7DAAAIBG5vhMEAABQaiIIAABIiggCAACSIoIAAICkiCAAACApIggAAEiKCAIAAJIiggAAgKSIIAAAICkiCAAASIoIAgAAkiKCAACApIggAAAgKSIIAABIiggCAACSIoIAAICkiCAAACApIggAAEiKCAIAAJIiggAAgKSIIAAAICkiCAAASIoIAgAAkiKCAACApIggAAAgKSIIAABIiggCAACSIoIAAICkiCAAACApIggAAEiKCAIAAJIiggAAgKSIIAAAICkiCAAASIoIAgAAkiKCAACApIggAAAgKblG0A033BAVFRXNLp/97GfzHAkAAChzlXkPMHjw4HjyySebrrdr1y7HaQAAgHKXewRVVlZ69wcAACiZ3L8T9Prrr0fPnj2jX79+8bWvfS3eeuutPa4tFApRX1/f7AIAAFCMXCPozDPPjEWLFsXjjz8e99xzT2zZsiXOOuus+N3vftfq+tra2qiurm661NTUlHhiAADgSFeRZVmW9xAf27VrV/Tv3z+++93vxsyZM1vcXigUolAoNF2vr6+PmpqaqKuri86dO5dy1Fb1nfVIm+73ztyJB3kSAABIS319fVRXV+9XG+T+naC/dMwxx8Qpp5wSr7/+equ3V1VVRVVVVYmnAgAAyknu3wn6S4VCIV555ZXo0aNH3qMAAABlKtcIuvbaa2PlypXx9ttvx/PPPx9f+cpXor6+PqZNm5bnWAAAQBnL9eNw//u//xsXX3xxbNu2LY477rgYMWJErFmzJvr06ZPnWAAAQBnLNYKWLFmS59MDAAAJOqy+EwQAAHCoiSAAACApIggAAEiKCAIAAJIiggAAgKSIIAAAICkiCAAASIoIAgAAkiKCAACApIggAAAgKSIIAABIiggCAACSIoIAAICkiCAAACApIggAAEiKCAIAAJIiggAAgKSIIAAAICkiCAAASIoIAgAAkiKCAACApIggAAAgKSIIAABIiggCAACSIoIAAICkiCAAACApIggAAEiKCAIAAJIiggAAgKSIIAAAICkiCAAASIoIAgAAkiKCAACApIggAAAgKZXFLN69e3c0Njbu9/qjjjoqKiuLegoAAIBDqqhC+cIXvhBdunSJLMv2uq6ioiKyLItdu3bFr371qwMaEAAA4GAqKoKyLIunnnpqv9efccYZRQ8EAABwKBX1naCKioqiHryY9bW1tVFRUREzZswo6jkAAACKcVj8MMLatWvj7rvvjlNPPTXvUQAAgDKXewTt3LkzLrnkkrjnnnviM5/5TN7jAAAAZS73CJo+fXpMnDgxzjvvvH2uLRQKUV9f3+wCAABQjKJ+GKFbt25x1lln7ff6Y489dq+3L1myJH7961/H2rVr9+vxamtrY86cOfv9/AAAAJ9UVAQNHz483nnnnf1e/7nPfW6Pt23atCmuueaaeOKJJ6JDhw779XizZ8+OmTNnNl2vr6+Pmpqa/Z4HAACgqAh6/PHHY9myZfv8d4I+NmXKlPj+97/f6m3r16+PrVu3xrBhw5qONTQ0xKpVq+KOO+6IQqEQ7dq1a3afqqqqqKqqKmZkAACAZor+d4J69+5d1Po9Offcc+M3v/lNs2OXXXZZDBw4ML73ve+1CCAAAICDoagIOpj/TlCnTp1iyJAhzY4dc8wx0a1btxbHAQAADpbcfx0OAACglIp6J+hQW7FiRd4jAAAAZa7o7wTdeOON+70WAADgcFNUBN15551F/QOl559/ftEDAQAAHEpFRdDIkSMP1RwAAAAl4YcRAACApIggAAAgKSIIAABIiggCAACSIoIAAICkiCAAACApIggAAEiKCAIAAJIiggAAgKSIIAAAICkiCAAASIoIAgAAkiKCAACApIggAAAgKSIIAABIiggCAACSIoIAAICkiCAAACApIggAAEiKCAIAAJIiggAAgKSIIAAAICkiCAAASIoIAgAAkiKCAACApIggAAAgKSIIAABIiggCAACSIoIAAICkiCAAACApIggAAEiKCAIAAJIiggAAgKSIIAAAICm5RtC8efPi1FNPjc6dO0fnzp1j5MiR8eijj+Y5EgAAUOZyjaBevXrF3LlzY926dbFu3br4q7/6q/jyl78c//M//5PnWAAAQBmrzPPJJ02a1Oz6TTfdFPPmzYs1a9bE4MGDc5oKAAAoZ7lG0F9qaGiI+++/P3bt2hUjR45sdU2hUIhCodB0vb6+vlTjAQAAZSL3H0b4zW9+E5/+9Kejqqoqrrjiili6dGkMGjSo1bW1tbVRXV3ddKmpqSnxtAAAwJEu9wgaMGBAbNy4MdasWRPf/va3Y9q0afHyyy+3unb27NlRV1fXdNm0aVOJpwUAAI50uX8crn379vG5z30uIiKGDx8ea9eujR/96Efxk5/8pMXaqqqqqKqqKvWIAABAGcn9naBPyrKs2fd+AAAADqZc3wm67rrrYsKECVFTUxM7duyIJUuWxIoVK+Kxxx7LcywAAKCM5RpB//d//xdTp06NzZs3R3V1dZx66qnx2GOPxbhx4/IcCwAAKGO5RtD8+fPzfHoAACBBh913ggAAAA4lEQQAACRFBAEAAEkRQQAAQFJEEAAAkBQRBAAAJEUEAQAASRFBAABAUkQQAACQFBEEAAAkRQQBAABJEUEAAEBSRBAAAJAUEQQAACRFBAEAAEkRQQAAQFJEEAAAkBQRBAAAJEUEAQAASRFBAABAUkQQAACQFBEEAAAkRQQBAABJEUEAAEBSRBAAAJAUEQQAACRFBAEAAEkRQQAAQFJEEAAAkBQRBAAAJEUEAQAASRFBAABAUkQQAACQFBEEAAAkRQQBAABJEUEAAEBSRBAAAJCUXCOotrY2zjjjjOjUqVN07949Jk+eHK+++mqeIwEAAGUu1whauXJlTJ8+PdasWRPLly+P3bt3x/jx42PXrl15jgUAAJSxyjyf/LHHHmt2fcGCBdG9e/dYv359fPGLX8xpKgAAoJzlGkGfVFdXFxERXbt2bfX2QqEQhUKh6Xp9fX1J5gIAAMrHYfPDCFmWxcyZM2P06NExZMiQVtfU1tZGdXV106WmpqbEUwIAAEe6wyaCrrzyynjxxRdj8eLFe1wze/bsqKura7ps2rSphBMCAADl4LD4ONxVV10VDz30UKxatSp69eq1x3VVVVVRVVVVwskAAIByk2sEZVkWV111VSxdujRWrFgR/fr1y3McAAAgAblG0PTp0+Pee++Nn//859GpU6fYsmVLRERUV1dHx44d8xwNAAAoU7l+J2jevHlRV1cXY8eOjR49ejRd7rvvvjzHAgAAyljuH4cDAAAopcPm1+EAAABKQQQBAABJEUEAAEBSRBAAAJAUEQQAACRFBAEAAEkRQQAAQFJEEAAAkBQRBAAAJEUEAQAASRFBAABAUkQQAACQFBEEAAAkRQQBAABJEUEAAEBSRBAAAJAUEQQAACRFBAEAAEkRQQAAQFJEEAAAkBQRBAAAJEUEAQAASRFBAABAUkQQAACQFBEEAAAkRQQBAABJEUEAAEBSRBAAAJAUEQQAACRFBAEAAEkRQQAAQFJEEAAAkBQRBAAAJEUEAQAASRFBAABAUkQQAACQFBEEAAAkJdcIWrVqVUyaNCl69uwZFRUVsWzZsjzHAQAAEpBrBO3atStOO+20uOOOO/IcAwAASEhlnk8+YcKEmDBhQp4jAAAAick1gopVKBSiUCg0Xa+vr89xGgAA4Eh0RP0wQm1tbVRXVzddampq8h4JAAA4whxRETR79uyoq6trumzatCnvkQAAgCPMEfVxuKqqqqiqqsp7DAAA4Ah2RL0TBAAAcKByfSdo586d8cYbbzRdf/vtt2Pjxo3RtWvX6N27d46TAQAA5SrXCFq3bl2cc845TddnzpwZERHTpk2LhQsX5jQVAABQznKNoLFjx0aWZXmOAAAAJMZ3ggAAgKSIIAAAICkiCAAASIoIAgAAkiKCAACApIggAAAgKSIIAABIiggCAACSIoIAAICkiCAAACApIggAAEiKCAIAAJIiggAAgKSIIAAAICkiCAAASIoIAgAAkiKCAACApIggAAAgKSIIAABIiggCAACSIoIAAICkiCAAACApIggAAEiKCAIAAJIiggAAgKSIIAAAICkiCAAASIoIAgAAkiKCAACApIggAAAgKSIIAABISmXeAxDRd9Yjbb7vO3MnHsRJAACg/HknCAAASIoIAgAAkiKCAACApIggAAAgKSIIAABISu4RdOedd0a/fv2iQ4cOMWzYsHjmmWfyHgkAAChjuf5E9n333RczZsyIO++8M0aNGhU/+clPYsKECfHyyy9H79698xztiNHWn9f209oAAKSqIsuyLK8nP/PMM+P000+PefPmNR07+eSTY/LkyVFbW7vP+9fX10d1dXXU1dVF586dD+Wo++VA/r2fI4V4AgDgcFRMG+T2TtBHH30U69evj1mzZjU7Pn78+Fi9enWr9ykUClEoFJqu19XVRcSf/+DDQWPhj3mPcMj1/s79eY9wWHppzvl5jwAAkLSPm2B/3uPJLYK2bdsWDQ0Ncfzxxzc7fvzxx8eWLVtavU9tbW3MmTOnxfGamppDMiPsr+rb854AAICIiB07dkR1dfVe1+T6naCIiIqKimbXsyxrcexjs2fPjpkzZzZdb2xsjN///vfRrVu3Pd6nVOrr66OmpiY2bdp0WHw0r5zZ69Kwz6Vhn0vHXpeGfS4de10a9rk0DsY+Z1kWO3bsiJ49e+5zbW4RdOyxx0a7du1avOuzdevWFu8OfayqqiqqqqqaHevSpcuhGrFNOnfu7D+QErHXpWGfS8M+l469Lg37XDr2ujTsc2kc6D7v6x2gj+X2E9nt27ePYcOGxfLly5sdX758eZx11lk5TQUAAJS7XD8ON3PmzJg6dWoMHz48Ro4cGXfffXe89957ccUVV+Q5FgAAUMZyjaCvfvWr8bvf/S5uvPHG2Lx5cwwZMiR+8YtfRJ8+ffIcq02qqqrin/7pn1p8XI+Dz16Xhn0uDftcOva6NOxz6djr0rDPpVHqfc713wkCAAAotdy+EwQAAJAHEQQAACRFBAEAAEkRQQAAQFJE0F7ceeed0a9fv+jQoUMMGzYsnnnmmb2uX7lyZQwbNiw6dOgQJ5xwQtx1110t1jzwwAMxaNCgqKqqikGDBsXSpUsP1fhHjGL2+cEHH4xx48bFcccdF507d46RI0fG448/3mzNwoULo6KiosXlww8/PNR/ymGtmH1esWJFq3v429/+ttk6r+fWFbPXl156aat7PXjw4KY1XtMtrVq1KiZNmhQ9e/aMioqKWLZs2T7v4xxdvGL32Tm67Yrda+fptil2n52j26a2tjbOOOOM6NSpU3Tv3j0mT54cr7766j7vV8rztAjag/vuuy9mzJgRf//3fx8bNmyIMWPGxIQJE+K9995rdf3bb78dX/rSl2LMmDGxYcOGuO666+Lqq6+OBx54oGnNc889F1/96ldj6tSp8cILL8TUqVPjoosuiueff75Uf9Zhp9h9XrVqVYwbNy5+8YtfxPr16+Occ86JSZMmxYYNG5qt69y5c2zevLnZpUOHDqX4kw5Lxe7zx1599dVme3jiiSc23eb13Lpi9/pHP/pRsz3etGlTdO3aNaZMmdJsndd0c7t27YrTTjst7rjjjv1a7xzdNsXus3N02xW71x9zni5OsfvsHN02K1eujOnTp8eaNWti+fLlsXv37hg/fnzs2rVrj/cp+Xk6o1Vf+MIXsiuuuKLZsYEDB2azZs1qdf13v/vdbODAgc2O/c3f/E02YsSIpusXXXRRdsEFFzRbc/7552df+9rXDtLUR55i97k1gwYNyubMmdN0fcGCBVl1dfXBGrEsFLvPTz/9dBYR2R/+8Ic9PqbXc+sO9DW9dOnSrKKiInvnnXeajnlN711EZEuXLt3rGufoA7c/+9wa5+ji7c9eO08fuLa8pp2j22br1q1ZRGQrV67c45pSn6e9E9SKjz76KNavXx/jx49vdnz8+PGxevXqVu/z3HPPtVh//vnnx7p16+JPf/rTXtfs6THLXVv2+ZMaGxtjx44d0bVr12bHd+7cGX369IlevXrFhRde2OL/QqbkQPZ56NCh0aNHjzj33HPj6aefbnab13NLB+M1PX/+/DjvvPNa/KPRXtMHxjk6H87Rh57zdGk5R7dNXV1dRESLc8FfKvV5WgS1Ytu2bdHQ0BDHH398s+PHH398bNmypdX7bNmypdX1u3fvjm3btu11zZ4es9y1ZZ8/6dZbb41du3bFRRdd1HRs4MCBsXDhwnjooYdi8eLF0aFDhxg1alS8/vrrB3X+I0Vb9rlHjx5x9913xwMPPBAPPvhgDBgwIM4999xYtWpV0xqv55YO9DW9efPmePTRR+Nb3/pWs+Ne0wfOOTofztGHjvN06TlHt02WZTFz5swYPXp0DBkyZI/rSn2eriz6HgmpqKhodj3LshbH9rX+k8eLfcwUtHVPFi9eHDfccEP8/Oc/j+7duzcdHzFiRIwYMaLp+qhRo+L000+Pf/u3f4t//dd/PXiDH2GK2ecBAwbEgAEDmq6PHDkyNm3aFLfcckt88YtfbNNjpqSt+7Jw4cLo0qVLTJ48udlxr+mDwzm6tJyjDy3n6dJzjm6bK6+8Ml588cV49tln97m2lOdp7wS14thjj4127dq1qMqtW7e2qM+Pffazn211fWVlZXTr1m2va/b0mOWuLfv8sfvuuy+++c1vxn/+53/Geeedt9e1Rx11VJxxxhnJ/h+ZA9nnvzRixIhme+j13NKB7HWWZfHTn/40pk6dGu3bt9/r2tRf023hHF1aztH5cJ4+dJyj2+aqq66Khx56KJ5++uno1avXXteW+jwtglrRvn37GDZsWCxfvrzZ8eXLl8dZZ53V6n1GjhzZYv0TTzwRw4cPj6OPPnqva/b0mOWuLfsc8ef/u3jppZfGvffeGxMnTtzn82RZFhs3bowePXoc8MxHorbu8ydt2LCh2R56Pbd0IHu9cuXKeOONN+Kb3/zmPp8n9dd0WzhHl45zdH6cpw8d5+jiZFkWV155ZTz44IPx1FNPRb9+/fZ5n5Kfp4v+KYVELFmyJDv66KOz+fPnZy+//HI2Y8aM7Jhjjmn6NZBZs2ZlU6dObVr/1ltvZZ/61Key73znO9nLL7+czZ8/Pzv66KOz//qv/2pa88tf/jJr165dNnfu3OyVV17J5s6dm1VWVmZr1qwp+d93uCh2n++9996ssrIy+/GPf5xt3ry56bJ9+/amNTfccEP22GOPZW+++Wa2YcOG7LLLLssqKyuz559/vuR/3+Gi2H3+l3/5l2zp0qXZa6+9lr300kvZrFmzsojIHnjggaY1Xs+tK3avP/b1r389O/PMM1t9TK/plnbs2JFt2LAh27BhQxYR2W233ZZt2LAhe/fdd7Msc44+WIrdZ+fotit2r52n26bYff6Yc3Rxvv3tb2fV1dXZihUrmp0L/vjHPzatyfs8LYL24sc//nHWp0+frH379tnpp5/e7Gf9pk2blp199tnN1q9YsSIbOnRo1r59+6xv377ZvHnzWjzm/fffnw0YMCA7+uijs4EDBzY7WaWqmH0+++yzs4hocZk2bVrTmhkzZmS9e/fO2rdvnx133HHZ+PHjs9WrV5fwLzo8FbPPN998c9a/f/+sQ4cO2Wc+85ls9OjR2SOPPNLiMb2eW1fsuWP79u1Zx44ds7vvvrvVx/Oabunjnwfe07nAOfrgKHafnaPbrti9dp5um7acO5yji9faHkdEtmDBgqY1eZ+nK/7/oAAAAEnwnSAAACApIggAAEiKCAIAAJIiggAAgKSIIAAAICkiCAAASIoIAgAAkiKCAACApFTmPQAA6Vm9enX87d/+bau3XXDBBbFu3brYtm1bq7f/6le/irvuuit++tOftnr79ddfH8OHD4/Jkye3evupp54aixYtim984xvx4osvtrpm2bJl0bdv333+HQAcmUQQACVXX18fkydPjhtuuKHZ8XfeeSdmzZoVO3fujI0bN7a439ixY6OxsTE++OCDuP3222Ps2LHNbl+4cGFs27YtPvzww/j85z8fCxcubPEYI0aMiIiI1157rdXnuPTSS+PDDz9s418GwJHAx+EAAICkiCAAACApIggAAEiKCAIAAJIiggAAgKSIIAAAICkiCAAASIoIAgAAkiKCAACApIggAAAgKZV5DwBAeqqrq+Phhx+Ohx9+uMVt559/fmzfvj2GDx/e6n2POuqo6NWrV1x77bWt3n7ddddFx44d46WXXmr1MU455ZSIiDj55JP3+BwdO3bc3z8FgCNQRZZlWd5DAAAAlIqPwwEAAEkRQQAAQFJEEAAAkBQRBAAAJEUEAQAASRFBAABAUkQQAACQFBEEAAAkRQQBAABJ+X8Yj2Osvv0pIAAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 1000x500 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "def analyze_similarity_matrix(i2i_sim):\n",
    "    \"\"\"分析相似度矩阵的统计特征\n",
    "    \n",
    "    Args:\n",
    "        i2i_sim (dict): 相似度矩阵\n",
    "    \"\"\"\n",
    "    # 收集所有的相似度分数\n",
    "    similarities = []\n",
    "    for item, sims in i2i_sim.items():\n",
    "        similarities.extend(list(sims.values()))\n",
    "    \n",
    "    # 计算统计信息\n",
    "    print('相似度矩阵统计信息：')\n",
    "    print(f'总物品数：{len(i2i_sim)}')\n",
    "    print(f'平均相似度：{np.mean(similarities):.4f}')\n",
    "    print(f'最大相似度：{np.max(similarities):.4f}')\n",
    "    print(f'最小相似度：{np.min(similarities):.4f}')\n",
    "    print(f'相似度标准差：{np.std(similarities):.4f}')\n",
    "    \n",
    "    # 绘制相似度分布图\n",
    "    plt.figure(figsize=(10, 5))\n",
    "    plt.hist(similarities, bins=50)\n",
    "    plt.title('文章相似度分布')\n",
    "    plt.xlabel('相似度分数')\n",
    "    plt.ylabel('频次')\n",
    "    plt.show()\n",
    "    \n",
    "    return similarities\n",
    "\n",
    "# 计算相似度矩阵\n",
    "print('开始计算文章相似度矩阵...')\n",
    "i2i_sim = itemcf_sim(all_click_df)\n",
    "\n",
    "# 分析相似度矩阵\n",
    "print('\\n分析相似度矩阵...')\n",
    "similarities = analyze_similarity_matrix(i2i_sim)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "49f65db1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 基于商品的召回i2i\n",
    "def item_based_recommend(user_id, user_item_time_dict, i2i_sim, sim_item_topk, recall_item_num, item_topk_click):\n",
    "    \"\"\"\n",
    "        基于文章协同过滤的召回\n",
    "        :param user_id: 用户id\n",
    "        :param user_item_time_dict: 字典, 根据点击时间获取用户的点击文章序列   {user1: [(item1, time1), (item2, time2)..]...}\n",
    "        :param i2i_sim: 字典，文章相似性矩阵\n",
    "        :param sim_item_topk: 整数， 选择与当前文章最相似的前k篇文章\n",
    "        :param recall_item_num: 整数， 最后的召回文章数量\n",
    "        :param item_topk_click: 列表，点击次数最多的文章列表，用户召回补全        \n",
    "        return: 召回的文章列表 {item1:score1, item2: score2...}\n",
    "        注意: 基于物品的协同过滤(详细请参考上一期推荐系统基础的组队学习)， 在多路召回部分会加上关联规则的召回策略\n",
    "    \"\"\"\n",
    "    \n",
    "    # 获取用户历史交互的文章\n",
    "    user_hist_items = user_item_time_dict[user_id]\n",
    "    user_hist_items_ = {user_id for user_id, _ in user_hist_items}\n",
    "    \n",
    "    item_rank = {}\n",
    "    for loc, (i, click_time) in enumerate(user_hist_items):\n",
    "        for j, wij in sorted(i2i_sim[i].items(), key=lambda x: x[1], reverse=True)[:sim_item_topk]:\n",
    "            if j in user_hist_items_:\n",
    "                continue\n",
    "                \n",
    "            item_rank.setdefault(j, 0)\n",
    "            item_rank[j] +=  wij\n",
    "    \n",
    "    # 不足10个，用热门商品补全\n",
    "    if len(item_rank) < recall_item_num:\n",
    "        for i, item in enumerate(item_topk_click):\n",
    "            if item in item_rank.items(): # 填充的item应该不在原来的列表中\n",
    "                continue\n",
    "            item_rank[item] = - i - 100 # 随便给个负数就行\n",
    "            if len(item_rank) == recall_item_num:\n",
    "                break\n",
    "    \n",
    "    item_rank = sorted(item_rank.items(), key=lambda x: x[1], reverse=True)[:recall_item_num]\n",
    "        \n",
    "    return item_rank"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "ac94a9f2",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 250000/250000 [25:26<00:00, 163.73it/s] \n",
      "100%|██████████| 250000/250000 [25:26<00:00, 163.73it/s]\n"
     ]
    }
   ],
   "source": [
    "# 定义\n",
    "user_recall_items_dict = collections.defaultdict(dict)\n",
    "\n",
    "# 获取 用户 - 文章 - 点击时间的字典\n",
    "user_item_time_dict = get_user_item_time(all_click_df)\n",
    "\n",
    "# 去取文章相似度\n",
    "i2i_sim = pickle.load(open(save_path + 'itemcf_i2i_sim.pkl', 'rb'))\n",
    "\n",
    "# 相似文章的数量\n",
    "sim_item_topk = 10\n",
    "\n",
    "# 召回文章数量\n",
    "recall_item_num = 10\n",
    "\n",
    "# 用户热度补全\n",
    "item_topk_click = get_item_topk_click(all_click_df, k=50)\n",
    "\n",
    "for user in tqdm(all_click_df['user_id'].unique()):\n",
    "    user_recall_items_dict[user] = item_based_recommend(user, user_item_time_dict, i2i_sim, \n",
    "                                                        sim_item_topk, recall_item_num, item_topk_click)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "94d94fa5",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 250000/250000 [00:03<00:00, 64928.20it/s]\n",
      "100%|██████████| 250000/250000 [00:03<00:00, 64928.20it/s]\n"
     ]
    }
   ],
   "source": [
    "# 将字典的形式转换成df\n",
    "user_item_score_list = []\n",
    "\n",
    "for user, items in tqdm(user_recall_items_dict.items()):\n",
    "    for item, score in items:\n",
    "        user_item_score_list.append([user, item, score])\n",
    "\n",
    "recall_df = pd.DataFrame(user_item_score_list, columns=['user_id', 'click_article_id', 'pred_score'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "0a3b0dd6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 生成提交文件\n",
    "def submit(recall_df, topk=5, model_name=None):\n",
    "    recall_df = recall_df.sort_values(by=['user_id', 'pred_score'])\n",
    "    recall_df['rank'] = recall_df.groupby(['user_id'])['pred_score'].rank(ascending=False, method='first')\n",
    "    \n",
    "    # 判断是不是每个用户都有5篇文章及以上\n",
    "    tmp = recall_df.groupby('user_id').apply(lambda x: x['rank'].max())\n",
    "    assert tmp.min() >= topk\n",
    "    \n",
    "    del recall_df['pred_score']\n",
    "    submit = recall_df[recall_df['rank'] <= topk].set_index(['user_id', 'rank']).unstack(-1).reset_index()\n",
    "    \n",
    "    submit.columns = [int(col) if isinstance(col, int) else col for col in submit.columns.droplevel(0)]\n",
    "    # 按照提交格式定义列名\n",
    "    submit = submit.rename(columns={'': 'user_id', 1: 'article_1', 2: 'article_2', \n",
    "                                                  3: 'article_3', 4: 'article_4', 5: 'article_5'})\n",
    "    \n",
    "    save_name = save_path + model_name + '_' + datetime.today().strftime('%m-%d') + '.csv'\n",
    "    submit.to_csv(save_name, index=False, header=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "2f6a5d64",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 获取测试集\n",
    "tst_click = pd.read_csv(data_path + 'testA_click_log.csv')\n",
    "tst_users = tst_click['user_id'].unique()\n",
    "\n",
    "# 从所有的召回数据中将测试集中的用户选出来\n",
    "tst_recall = recall_df[recall_df['user_id'].isin(tst_users)]\n",
    "\n",
    "# 生成提交文件\n",
    "submit(tst_recall, topk=5, model_name='itemcf_baseline')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
